import { promises as fs } from 'fs';
import * as path from 'path';
export interface LanguageSpecificFeatures {
decorators?: Array<{ name: string; target: string; parameters?: any[] }>;
annotations?: Array<{ name: string; target: string; parameters?: any[] }>;
interfaces?: Array<{ name: string; methods: string[]; properties: string[] }>;
traits?: Array<{ name: string; methods: string[]; associatedTypes?: string[] }>;
protocols?: Array<{ name: string; requirements: string[] }>;
extensions?: Array<{ target: string; methods: string[] }>;
enums?: Array<{ name: string; cases: string[]; rawType?: string }>;
generics?: Array<{ name: string; constraints?: string[] }>;
macros?: Array<{ name: string; type: string }>;
mixins?: Array<{ name: string; methods: string[] }>;
dataClasses?: Array<{ name: string; fields: Array<{ name: string; type: string }> }>;
sealedClasses?: Array<{ name: string; subclasses: string[] }>;
}
export abstract class LanguageSpecificAnalyzer {
abstract canHandle(filePath: string): boolean;
abstract analyze(content: string, filePath: string): Promise<LanguageSpecificFeatures>;
}
// Python Decorator Analyzer
export class PythonDecoratorAnalyzer extends LanguageSpecificAnalyzer {
canHandle(filePath: string): boolean {
return ['.py', '.pyw'].includes(path.extname(filePath).toLowerCase());
}
async analyze(content: string, filePath: string): Promise<LanguageSpecificFeatures> {
const features: LanguageSpecificFeatures = {
decorators: [],
dataClasses: [],
enums: []
};
// Analyze decorators
const decoratorRegex = /@(\w+)(?:\(([^)]*)\))?\s*\n\s*(?:def|class)\s+(\w+)/g;
let match;
while ((match = decoratorRegex.exec(content)) !== null) {
const decoratorName = match[1];
const params = match[2] ? this.parseParameters(match[2]) : [];
const targetName = match[3];
features.decorators!.push({
name: decoratorName,
target: targetName,
parameters: params
});
// Check for dataclass
if (decoratorName === 'dataclass') {
const dataclassFields = this.extractDataclassFields(content, targetName);
features.dataClasses!.push({
name: targetName,
fields: dataclassFields
});
}
}
// Analyze enums
const enumRegex = /class\s+(\w+)\s*\(Enum\):/g;
while ((match = enumRegex.exec(content)) !== null) {
const enumName = match[1];
const cases = this.extractEnumCases(content, match.index);
features.enums!.push({
name: enumName,
cases
});
}
return features;
}
private parseParameters(params: string): any[] {
return params.split(',').map(p => p.trim()).filter(p => p);
}
private extractDataclassFields(content: string, className: string): Array<{ name: string; type: string }> {
const classRegex = new RegExp(`class\\s+${className}[^:]*:\\s*\\n`, 'g');
const classMatch = classRegex.exec(content);
if (!classMatch) return [];
const fields: Array<{ name: string; type: string }> = [];
const fieldRegex = /^\s+(\w+)\s*:\s*([^\n=]+)(?:\s*=\s*[^\n]+)?$/gm;
let match;
let searchStart = classMatch.index + classMatch[0].length;
let classBody = content.substring(searchStart, searchStart + 1000); // Look ahead 1000 chars
while ((match = fieldRegex.exec(classBody)) !== null) {
// Check if we're still in the class
if (match[0].startsWith(' ') || match[0].startsWith('\t')) {
fields.push({
name: match[1],
type: match[2].trim()
});
} else {
break;
}
}
return fields;
}
private extractEnumCases(content: string, startIndex: number): string[] {
const cases: string[] = [];
const lines = content.substring(startIndex).split('\n');
let inEnum = false;
for (const line of lines) {
if (line.includes('class') && line.includes('(Enum):')) {
inEnum = true;
continue;
}
if (inEnum) {
const caseMatch = line.match(/^\s+(\w+)\s*=/);
if (caseMatch) {
cases.push(caseMatch[1]);
} else if (line.trim() && !line.startsWith(' ') && !line.startsWith('\t')) {
break;
}
}
}
return cases;
}
}
// Java Annotation Analyzer
export class JavaAnnotationAnalyzer extends LanguageSpecificAnalyzer {
canHandle(filePath: string): boolean {
return path.extname(filePath).toLowerCase() === '.java';
}
async analyze(content: string, filePath: string): Promise<LanguageSpecificFeatures> {
const features: LanguageSpecificFeatures = {
annotations: [],
interfaces: [],
enums: []
};
// Analyze annotations
const annotationRegex = /@(\w+)(?:\(([^)]*)\))?\s*\n?\s*(?:public\s+)?(?:private\s+)?(?:protected\s+)?(?:static\s+)?(?:final\s+)?(?:class|interface|enum|(?:\w+\s+)?(\w+)\s*\()/g;
let match;
while ((match = annotationRegex.exec(content)) !== null) {
const annotationName = match[1];
const params = match[2] ? this.parseAnnotationParameters(match[2]) : [];
const targetName = match[3] || this.findNextIdentifier(content, match.index + match[0].length);
features.annotations!.push({
name: annotationName,
target: targetName,
parameters: params
});
}
// Analyze interfaces
const interfaceRegex = /(?:public\s+)?interface\s+(\w+)(?:<[^>]+>)?\s*(?:extends\s+[^{]+)?\s*\{/g;
while ((match = interfaceRegex.exec(content)) !== null) {
const interfaceName = match[1];
const interfaceBody = this.extractBlock(content, match.index + match[0].length - 1);
const methods = this.extractInterfaceMethods(interfaceBody);
const properties = this.extractInterfaceProperties(interfaceBody);
features.interfaces!.push({
name: interfaceName,
methods,
properties
});
}
// Analyze enums
const enumRegex = /(?:public\s+)?enum\s+(\w+)\s*\{([^}]+)\}/g;
while ((match = enumRegex.exec(content)) !== null) {
const enumName = match[1];
const enumBody = match[2];
const cases = enumBody.split(',').map(c => c.trim().split(/\s+/)[0]).filter(c => c && c !== ';');
features.enums!.push({
name: enumName,
cases
});
}
return features;
}
private parseAnnotationParameters(params: string): any[] {
return params.split(',').map(p => {
const [key, value] = p.split('=').map(s => s.trim());
return value ? { key, value } : key;
});
}
private findNextIdentifier(content: string, startIndex: number): string {
const slice = content.substring(startIndex, startIndex + 100);
const match = slice.match(/(\w+)/);
return match ? match[1] : 'unknown';
}
private extractBlock(content: string, startIndex: number): string {
let braceCount = 1;
let result = '';
for (let i = startIndex + 1; i < content.length; i++) {
const char = content[i];
if (char === '{') braceCount++;
else if (char === '}') {
braceCount--;
if (braceCount === 0) break;
}
result += char;
}
return result;
}
private extractInterfaceMethods(body: string): string[] {
const methods: string[] = [];
const methodRegex = /(?:default\s+)?(?:\w+(?:<[^>]+>)?)\s+(\w+)\s*\([^)]*\)/g;
let match;
while ((match = methodRegex.exec(body)) !== null) {
methods.push(match[1]);
}
return methods;
}
private extractInterfaceProperties(body: string): string[] {
const properties: string[] = [];
const propertyRegex = /(?:public\s+)?(?:static\s+)?(?:final\s+)?(\w+)\s+(\w+)\s*[;=]/g;
let match;
while ((match = propertyRegex.exec(body)) !== null) {
properties.push(match[2]);
}
return properties;
}
}
// Go Interface Analyzer
export class GoInterfaceAnalyzer extends LanguageSpecificAnalyzer {
canHandle(filePath: string): boolean {
return path.extname(filePath).toLowerCase() === '.go';
}
async analyze(content: string, filePath: string): Promise<LanguageSpecificFeatures> {
const features: LanguageSpecificFeatures = {
interfaces: []
};
// Analyze interfaces
const interfaceRegex = /type\s+(\w+)\s+interface\s*\{([^}]*)\}/g;
let match;
while ((match = interfaceRegex.exec(content)) !== null) {
const interfaceName = match[1];
const interfaceBody = match[2];
const methods = this.extractInterfaceMethods(interfaceBody);
features.interfaces!.push({
name: interfaceName,
methods,
properties: [] // Go interfaces don't have properties
});
}
return features;
}
private extractInterfaceMethods(body: string): string[] {
const methods: string[] = [];
const lines = body.split('\n').map(l => l.trim()).filter(l => l);
for (const line of lines) {
const methodMatch = line.match(/^(\w+)\s*\(/);
if (methodMatch) {
methods.push(methodMatch[1]);
}
}
return methods;
}
}
// Rust Trait Analyzer
export class RustTraitAnalyzer extends LanguageSpecificAnalyzer {
canHandle(filePath: string): boolean {
return path.extname(filePath).toLowerCase() === '.rs';
}
async analyze(content: string, filePath: string): Promise<LanguageSpecificFeatures> {
const features: LanguageSpecificFeatures = {
traits: [],
enums: [],
macros: []
};
// Analyze traits
const traitRegex = /(?:pub\s+)?trait\s+(\w+)(?:<[^>]+>)?\s*(?::\s*[^{]+)?\s*\{([^}]*)\}/g;
let match;
while ((match = traitRegex.exec(content)) !== null) {
const traitName = match[1];
const traitBody = match[2];
const methods = this.extractTraitMethods(traitBody);
const associatedTypes = this.extractAssociatedTypes(traitBody);
features.traits!.push({
name: traitName,
methods,
associatedTypes
});
}
// Analyze enums
const enumRegex = /(?:pub\s+)?enum\s+(\w+)(?:<[^>]+>)?\s*\{([^}]*)\}/g;
while ((match = enumRegex.exec(content)) !== null) {
const enumName = match[1];
const enumBody = match[2];
const cases = this.extractEnumVariants(enumBody);
features.enums!.push({
name: enumName,
cases
});
}
// Analyze macros
const macroRegex = /macro_rules!\s+(\w+)\s*\{/g;
while ((match = macroRegex.exec(content)) !== null) {
features.macros!.push({
name: match[1],
type: 'macro_rules'
});
}
const procMacroRegex = /#\[proc_macro(?:_derive|_attribute)?\]\s*pub\s+fn\s+(\w+)/g;
while ((match = procMacroRegex.exec(content)) !== null) {
features.macros!.push({
name: match[1],
type: 'proc_macro'
});
}
return features;
}
private extractTraitMethods(body: string): string[] {
const methods: string[] = [];
const methodRegex = /fn\s+(\w+)\s*(?:<[^>]+>)?\s*\([^)]*\)/g;
let match;
while ((match = methodRegex.exec(body)) !== null) {
methods.push(match[1]);
}
return methods;
}
private extractAssociatedTypes(body: string): string[] {
const types: string[] = [];
const typeRegex = /type\s+(\w+)(?:\s*:\s*[^;]+)?;/g;
let match;
while ((match = typeRegex.exec(body)) !== null) {
types.push(match[1]);
}
return types;
}
private extractEnumVariants(body: string): string[] {
const variants: string[] = [];
const lines = body.split(',').map(l => l.trim()).filter(l => l);
for (const line of lines) {
const variantMatch = line.match(/^(\w+)/);
if (variantMatch) {
variants.push(variantMatch[1]);
}
}
return variants;
}
}
// Swift Protocol & Extension Analyzer
export class SwiftAnalyzer extends LanguageSpecificAnalyzer {
canHandle(filePath: string): boolean {
return path.extname(filePath).toLowerCase() === '.swift';
}
async analyze(content: string, filePath: string): Promise<LanguageSpecificFeatures> {
const features: LanguageSpecificFeatures = {
protocols: [],
extensions: [],
enums: [],
generics: []
};
// Analyze protocols
const protocolRegex = /protocol\s+(\w+)(?:\s*:\s*[^{]+)?\s*\{([^}]*)\}/g;
let match;
while ((match = protocolRegex.exec(content)) !== null) {
const protocolName = match[1];
const protocolBody = match[2];
const requirements = this.extractProtocolRequirements(protocolBody);
features.protocols!.push({
name: protocolName,
requirements
});
}
// Analyze extensions
const extensionRegex = /extension\s+(\w+)(?:\s*:\s*[^{]+)?\s*(?:where\s+[^{]+)?\s*\{([^}]*)\}/g;
while ((match = extensionRegex.exec(content)) !== null) {
const targetType = match[1];
const extensionBody = match[2];
const methods = this.extractExtensionMethods(extensionBody);
features.extensions!.push({
target: targetType,
methods
});
}
// Analyze enums
const enumRegex = /enum\s+(\w+)(?:\s*:\s*(\w+))?\s*\{([^}]*)\}/g;
while ((match = enumRegex.exec(content)) !== null) {
const enumName = match[1];
const rawType = match[2];
const enumBody = match[3];
const cases = this.extractEnumCases(enumBody);
features.enums!.push({
name: enumName,
cases,
rawType
});
}
// Analyze generics
const genericRegex = /(?:func|class|struct|enum)\s+\w+<([^>]+)>/g;
while ((match = genericRegex.exec(content)) !== null) {
const genericParams = match[1];
const params = this.parseGenericParameters(genericParams);
params.forEach(param => {
features.generics!.push(param);
});
}
return features;
}
private extractProtocolRequirements(body: string): string[] {
const requirements: string[] = [];
const lines = body.split('\n').map(l => l.trim()).filter(l => l);
for (const line of lines) {
const funcMatch = line.match(/func\s+(\w+)/);
const varMatch = line.match(/var\s+(\w+)/);
if (funcMatch) {
requirements.push(`func ${funcMatch[1]}`);
} else if (varMatch) {
requirements.push(`var ${varMatch[1]}`);
}
}
return requirements;
}
private extractExtensionMethods(body: string): string[] {
const methods: string[] = [];
const methodRegex = /func\s+(\w+)\s*(?:<[^>]+>)?\s*\([^)]*\)/g;
let match;
while ((match = methodRegex.exec(body)) !== null) {
methods.push(match[1]);
}
return methods;
}
private extractEnumCases(body: string): string[] {
const cases: string[] = [];
const caseRegex = /case\s+(\w+)(?:\s*\([^)]*\))?/g;
let match;
while ((match = caseRegex.exec(body)) !== null) {
cases.push(match[1]);
}
return cases;
}
private parseGenericParameters(params: string): Array<{ name: string; constraints?: string[] }> {
const result: Array<{ name: string; constraints?: string[] }> = [];
const parts = params.split(',').map(p => p.trim());
for (const part of parts) {
const [name, ...constraints] = part.split(':').map(s => s.trim());
result.push({
name,
constraints: constraints.length > 0 ? constraints : undefined
});
}
return result;
}
}
// Kotlin Data Class & Sealed Class Analyzer
export class KotlinAnalyzer extends LanguageSpecificAnalyzer {
canHandle(filePath: string): boolean {
const ext = path.extname(filePath).toLowerCase();
return ext === '.kt' || ext === '.kts';
}
async analyze(content: string, filePath: string): Promise<LanguageSpecificFeatures> {
const features: LanguageSpecificFeatures = {
dataClasses: [],
sealedClasses: [],
interfaces: [],
annotations: []
};
// Analyze data classes
const dataClassRegex = /data\s+class\s+(\w+)\s*\(([^)]*)\)/g;
let match;
while ((match = dataClassRegex.exec(content)) !== null) {
const className = match[1];
const params = match[2];
const fields = this.parseDataClassFields(params);
features.dataClasses!.push({
name: className,
fields
});
}
// Analyze sealed classes
const sealedClassRegex = /sealed\s+class\s+(\w+)/g;
while ((match = sealedClassRegex.exec(content)) !== null) {
const sealedClassName = match[1];
const subclasses = this.findSealedSubclasses(content, sealedClassName);
features.sealedClasses!.push({
name: sealedClassName,
subclasses
});
}
// Analyze interfaces
const interfaceRegex = /interface\s+(\w+)(?:<[^>]+>)?\s*(?::\s*[^{]+)?\s*\{([^}]*)\}/g;
while ((match = interfaceRegex.exec(content)) !== null) {
const interfaceName = match[1];
const interfaceBody = match[2];
const methods = this.extractKotlinInterfaceMethods(interfaceBody);
const properties = this.extractKotlinInterfaceProperties(interfaceBody);
features.interfaces!.push({
name: interfaceName,
methods,
properties
});
}
// Analyze annotations
const annotationRegex = /@(\w+)(?:\(([^)]*)\))?\s*(?:class|fun|val|var)\s+(\w+)/g;
while ((match = annotationRegex.exec(content)) !== null) {
features.annotations!.push({
name: match[1],
target: match[3],
parameters: match[2] ? this.parseAnnotationParams(match[2]) : []
});
}
return features;
}
private parseDataClassFields(params: string): Array<{ name: string; type: string }> {
const fields: Array<{ name: string; type: string }> = [];
const fieldRegex = /(?:val|var)\s+(\w+)\s*:\s*([^,=]+)(?:\s*=\s*[^,]+)?/g;
let match;
while ((match = fieldRegex.exec(params)) !== null) {
fields.push({
name: match[1],
type: match[2].trim()
});
}
return fields;
}
private findSealedSubclasses(content: string, sealedClassName: string): string[] {
const subclasses: string[] = [];
const subclassRegex = new RegExp(`(?:class|object)\\s+(\\w+)\\s*(?:\\([^)]*\\))?\\s*:\\s*${sealedClassName}`, 'g');
let match;
while ((match = subclassRegex.exec(content)) !== null) {
subclasses.push(match[1]);
}
return subclasses;
}
private extractKotlinInterfaceMethods(body: string): string[] {
const methods: string[] = [];
const methodRegex = /fun\s+(\w+)\s*(?:<[^>]+>)?\s*\([^)]*\)/g;
let match;
while ((match = methodRegex.exec(body)) !== null) {
methods.push(match[1]);
}
return methods;
}
private extractKotlinInterfaceProperties(body: string): string[] {
const properties: string[] = [];
const propertyRegex = /(?:val|var)\s+(\w+)\s*:/g;
let match;
while ((match = propertyRegex.exec(body)) !== null) {
properties.push(match[1]);
}
return properties;
}
private parseAnnotationParams(params: string): any[] {
return params.split(',').map(p => p.trim());
}
}
// Scala Trait & Case Class Analyzer
export class ScalaAnalyzer extends LanguageSpecificAnalyzer {
canHandle(filePath: string): boolean {
return path.extname(filePath).toLowerCase() === '.scala';
}
async analyze(content: string, filePath: string): Promise<LanguageSpecificFeatures> {
const features: LanguageSpecificFeatures = {
traits: [],
dataClasses: [], // Case classes in Scala
sealedClasses: [],
mixins: []
};
// Analyze traits
const traitRegex = /trait\s+(\w+)(?:\[[^\]]+\])?\s*(?:extends\s+[^{]+)?\s*\{([^}]*)\}/g;
let match;
while ((match = traitRegex.exec(content)) !== null) {
const traitName = match[1];
const traitBody = match[2];
const methods = this.extractTraitMethods(traitBody);
features.traits!.push({
name: traitName,
methods,
associatedTypes: []
});
// Traits can also be mixins
features.mixins!.push({
name: traitName,
methods
});
}
// Analyze case classes
const caseClassRegex = /case\s+class\s+(\w+)\s*\(([^)]*)\)/g;
while ((match = caseClassRegex.exec(content)) !== null) {
const className = match[1];
const params = match[2];
const fields = this.parseCaseClassFields(params);
features.dataClasses!.push({
name: className,
fields
});
}
// Analyze sealed traits/classes
const sealedRegex = /sealed\s+(?:trait|abstract\s+class)\s+(\w+)/g;
while ((match = sealedRegex.exec(content)) !== null) {
const sealedName = match[1];
const implementations = this.findSealedImplementations(content, sealedName);
features.sealedClasses!.push({
name: sealedName,
subclasses: implementations
});
}
return features;
}
private extractTraitMethods(body: string): string[] {
const methods: string[] = [];
const methodRegex = /def\s+(\w+)(?:\[[^\]]+\])?\s*(?:\([^)]*\))?/g;
let match;
while ((match = methodRegex.exec(body)) !== null) {
methods.push(match[1]);
}
return methods;
}
private parseCaseClassFields(params: string): Array<{ name: string; type: string }> {
const fields: Array<{ name: string; type: string }> = [];
const fieldRegex = /(\w+)\s*:\s*([^,=]+)(?:\s*=\s*[^,]+)?/g;
let match;
while ((match = fieldRegex.exec(params)) !== null) {
fields.push({
name: match[1],
type: match[2].trim()
});
}
return fields;
}
private findSealedImplementations(content: string, sealedName: string): string[] {
const implementations: string[] = [];
const implRegex = new RegExp(`(?:case\\s+)?(?:class|object)\\s+(\\w+)\\s*(?:\\([^)]*\\))?\\s*extends\\s+${sealedName}`, 'g');
let match;
while ((match = implRegex.exec(content)) !== null) {
implementations.push(match[1]);
}
return implementations;
}
}
// Elixir Behaviour & Macro Analyzer
export class ElixirAnalyzer extends LanguageSpecificAnalyzer {
canHandle(filePath: string): boolean {
const ext = path.extname(filePath).toLowerCase();
return ext === '.ex' || ext === '.exs';
}
async analyze(content: string, filePath: string): Promise<LanguageSpecificFeatures> {
const features: LanguageSpecificFeatures = {
interfaces: [], // Behaviours in Elixir
macros: [],
protocols: []
};
// Analyze behaviours (interfaces in Elixir)
const behaviourRegex = /@callback\s+(\w+)\s*\(/g;
const behaviourDefs: Map<string, string[]> = new Map();
let match;
// First, find module that defines behaviours
const moduleRegex = /defmodule\s+([\w.]+)\s+do([\s\S]*?)end/g;
while ((match = moduleRegex.exec(content)) !== null) {
const moduleName = match[1];
const moduleBody = match[2];
const callbacks: string[] = [];
const callbackRegex = /@callback\s+(\w+)/g;
let callbackMatch;
while ((callbackMatch = callbackRegex.exec(moduleBody)) !== null) {
callbacks.push(callbackMatch[1]);
}
if (callbacks.length > 0) {
features.interfaces!.push({
name: moduleName,
methods: callbacks,
properties: []
});
}
}
// Analyze macros
const macroRegex = /defmacro\s+(\w+)\s*(?:\([^)]*\))?\s+do/g;
while ((match = macroRegex.exec(content)) !== null) {
features.macros!.push({
name: match[1],
type: 'defmacro'
});
}
// Analyze protocols
const protocolRegex = /defprotocol\s+([\w.]+)\s+do([\s\S]*?)end/g;
while ((match = protocolRegex.exec(content)) !== null) {
const protocolName = match[1];
const protocolBody = match[2];
const functions = this.extractProtocolFunctions(protocolBody);
features.protocols!.push({
name: protocolName,
requirements: functions
});
}
return features;
}
private extractProtocolFunctions(body: string): string[] {
const functions: string[] = [];
const funcRegex = /def\s+(\w+)\s*\(/g;
let match;
while ((match = funcRegex.exec(body)) !== null) {
functions.push(match[1]);
}
return functions;
}
}
// Create a manager to handle all analyzers
export class LanguageSpecificFeatureManager {
private analyzers: LanguageSpecificAnalyzer[] = [];
constructor() {
this.registerAnalyzers();
}
private registerAnalyzers() {
this.analyzers.push(
new PythonDecoratorAnalyzer(),
new JavaAnnotationAnalyzer(),
new GoInterfaceAnalyzer(),
new RustTraitAnalyzer(),
new SwiftAnalyzer(),
new KotlinAnalyzer(),
new ScalaAnalyzer(),
new ElixirAnalyzer()
);
}
async analyzeFile(filePath: string): Promise<LanguageSpecificFeatures> {
const analyzer = this.analyzers.find(a => a.canHandle(filePath));
if (!analyzer) {
return {}; // Return empty features for unsupported languages
}
const content = await fs.readFile(filePath, 'utf-8');
return analyzer.analyze(content, filePath);
}
getAnalyzer(filePath: string): LanguageSpecificAnalyzer | null {
return this.analyzers.find(a => a.canHandle(filePath)) || null;
}
}