import fs from 'fs/promises';
export interface Symbol {
name: string;
type: 'function' | 'class' | 'method' | 'interface' | 'type' | 'const' | 'variable' | 'enum';
line: number;
signature?: string;
parentClass?: string;
}
/**
* Extracts symbols from source code files
* Uses regex-based parsing for broad language support
*/
export class SymbolExtractor {
/**
* Extract symbols from a file
*/
async extractFromFile(filePath: string, extension: string): Promise<Symbol[]> {
try {
const content = await fs.readFile(filePath, 'utf-8');
return this.extractSymbols(content, extension);
} catch (error) {
return [];
}
}
private extractSymbols(content: string, extension: string): Symbol[] {
const symbols: Symbol[] = [];
const lines = content.split('\n');
switch (extension) {
case '.js':
case '.jsx':
case '.ts':
case '.tsx':
case '.mjs':
case '.cjs':
this.extractJavaScriptSymbols(lines, symbols);
break;
case '.py':
this.extractPythonSymbols(lines, symbols);
break;
case '.java':
this.extractJavaSymbols(lines, symbols);
break;
case '.go':
this.extractGoSymbols(lines, symbols);
break;
case '.rs':
this.extractRustSymbols(lines, symbols);
break;
case '.c':
case '.cpp':
case '.cc':
case '.h':
case '.hpp':
this.extractCSymbols(lines, symbols);
break;
case '.php':
this.extractPHPSymbols(lines, symbols);
break;
case '.rb':
this.extractRubySymbols(lines, symbols);
break;
case '.cs':
this.extractCSharpSymbols(lines, symbols);
break;
}
return symbols;
}
private extractJavaScriptSymbols(lines: string[], symbols: Symbol[]): void {
let currentClass: string | undefined;
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
const lineNum = i + 1;
// Class declarations
const classMatch = line.match(/^(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/);
if (classMatch) {
currentClass = classMatch[1];
symbols.push({
name: classMatch[1],
type: 'class',
line: lineNum,
signature: line
});
continue;
}
// Interface/Type declarations
const interfaceMatch = line.match(/^(?:export\s+)?interface\s+(\w+)/);
if (interfaceMatch) {
symbols.push({
name: interfaceMatch[1],
type: 'interface',
line: lineNum,
signature: line
});
continue;
}
const typeMatch = line.match(/^(?:export\s+)?type\s+(\w+)/);
if (typeMatch) {
symbols.push({
name: typeMatch[1],
type: 'type',
line: lineNum,
signature: line
});
continue;
}
// Enum declarations
const enumMatch = line.match(/^(?:export\s+)?enum\s+(\w+)/);
if (enumMatch) {
symbols.push({
name: enumMatch[1],
type: 'enum',
line: lineNum,
signature: line
});
continue;
}
// Function declarations
const functionMatch = line.match(/^(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(/);
if (functionMatch) {
symbols.push({
name: functionMatch[1],
type: 'function',
line: lineNum,
signature: line
});
continue;
}
// Arrow functions
const arrowMatch = line.match(/^(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>/);
if (arrowMatch) {
symbols.push({
name: arrowMatch[1],
type: 'function',
line: lineNum,
signature: line
});
continue;
}
// Method declarations
const methodMatch = line.match(/^\s*(?:async\s+)?(\w+)\s*\([^)]*\)\s*[:{]/);
if (methodMatch && currentClass) {
symbols.push({
name: methodMatch[1],
type: 'method',
line: lineNum,
parentClass: currentClass,
signature: line
});
continue;
}
// Const/let/var declarations
const constMatch = line.match(/^(?:export\s+)?const\s+(\w+)\s*=/);
if (constMatch) {
symbols.push({
name: constMatch[1],
type: 'const',
line: lineNum
});
continue;
}
// Check for class end
if (line === '}' && currentClass) {
currentClass = undefined;
}
}
}
private extractPythonSymbols(lines: string[], symbols: Symbol[]): void {
let currentClass: string | undefined;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const lineNum = i + 1;
// Class declarations
const classMatch = line.match(/^class\s+(\w+)/);
if (classMatch) {
currentClass = classMatch[1];
symbols.push({
name: classMatch[1],
type: 'class',
line: lineNum,
signature: line.trim()
});
continue;
}
// Function/method declarations
const funcMatch = line.match(/^(\s*)def\s+(\w+)\s*\(/);
if (funcMatch) {
const indent = funcMatch[1];
const name = funcMatch[2];
symbols.push({
name,
type: indent.length > 0 && currentClass ? 'method' : 'function',
line: lineNum,
parentClass: indent.length > 0 ? currentClass : undefined,
signature: line.trim()
});
}
}
}
private extractJavaSymbols(lines: string[], symbols: Symbol[]): void {
let currentClass: string | undefined;
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
const lineNum = i + 1;
// Class declarations
const classMatch = line.match(/(?:public|private|protected)?\s*(?:static\s+)?(?:final\s+)?class\s+(\w+)/);
if (classMatch) {
currentClass = classMatch[1];
symbols.push({
name: classMatch[1],
type: 'class',
line: lineNum,
signature: line
});
continue;
}
// Interface declarations
const interfaceMatch = line.match(/(?:public\s+)?interface\s+(\w+)/);
if (interfaceMatch) {
symbols.push({
name: interfaceMatch[1],
type: 'interface',
line: lineNum,
signature: line
});
continue;
}
// Method declarations
const methodMatch = line.match(/(?:public|private|protected)\s+(?:static\s+)?(?:\w+<[^>]+>|\w+)\s+(\w+)\s*\(/);
if (methodMatch) {
symbols.push({
name: methodMatch[1],
type: currentClass ? 'method' : 'function',
line: lineNum,
parentClass: currentClass,
signature: line
});
}
}
}
private extractGoSymbols(lines: string[], symbols: Symbol[]): void {
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
const lineNum = i + 1;
// Function declarations
const funcMatch = line.match(/^func\s+(?:\([^)]+\)\s+)?(\w+)\s*\(/);
if (funcMatch) {
symbols.push({
name: funcMatch[1],
type: 'function',
line: lineNum,
signature: line
});
continue;
}
// Type declarations (structs, interfaces)
const typeMatch = line.match(/^type\s+(\w+)\s+(struct|interface)/);
if (typeMatch) {
symbols.push({
name: typeMatch[1],
type: typeMatch[2] === 'interface' ? 'interface' : 'type',
line: lineNum,
signature: line
});
}
}
}
private extractRustSymbols(lines: string[], symbols: Symbol[]): void {
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
const lineNum = i + 1;
// Function declarations
const funcMatch = line.match(/^(?:pub\s+)?(?:async\s+)?fn\s+(\w+)\s*\(/);
if (funcMatch) {
symbols.push({
name: funcMatch[1],
type: 'function',
line: lineNum,
signature: line
});
continue;
}
// Struct declarations
const structMatch = line.match(/^(?:pub\s+)?struct\s+(\w+)/);
if (structMatch) {
symbols.push({
name: structMatch[1],
type: 'class',
line: lineNum,
signature: line
});
continue;
}
// Trait declarations
const traitMatch = line.match(/^(?:pub\s+)?trait\s+(\w+)/);
if (traitMatch) {
symbols.push({
name: traitMatch[1],
type: 'interface',
line: lineNum,
signature: line
});
}
}
}
private extractCSymbols(lines: string[], symbols: Symbol[]): void {
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
const lineNum = i + 1;
// Function declarations
const funcMatch = line.match(/^(?:\w+\s+)*(\w+)\s*\([^)]*\)\s*[{;]/);
if (funcMatch && !line.startsWith('if') && !line.startsWith('while') && !line.startsWith('for')) {
symbols.push({
name: funcMatch[1],
type: 'function',
line: lineNum,
signature: line
});
continue;
}
// Struct/class declarations
const structMatch = line.match(/^(?:typedef\s+)?(?:struct|class)\s+(\w+)/);
if (structMatch) {
symbols.push({
name: structMatch[1],
type: 'class',
line: lineNum,
signature: line
});
}
}
}
private extractPHPSymbols(lines: string[], symbols: Symbol[]): void {
let currentClass: string | undefined;
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
const lineNum = i + 1;
// Class declarations
const classMatch = line.match(/^class\s+(\w+)/);
if (classMatch) {
currentClass = classMatch[1];
symbols.push({
name: classMatch[1],
type: 'class',
line: lineNum,
signature: line
});
continue;
}
// Function/method declarations
const funcMatch = line.match(/^(?:public|private|protected)?\s*(?:static\s+)?function\s+(\w+)\s*\(/);
if (funcMatch) {
symbols.push({
name: funcMatch[1],
type: currentClass ? 'method' : 'function',
line: lineNum,
parentClass: currentClass,
signature: line
});
}
}
}
private extractRubySymbols(lines: string[], symbols: Symbol[]): void {
let currentClass: string | undefined;
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
const lineNum = i + 1;
// Class declarations
const classMatch = line.match(/^class\s+(\w+)/);
if (classMatch) {
currentClass = classMatch[1];
symbols.push({
name: classMatch[1],
type: 'class',
line: lineNum,
signature: line
});
continue;
}
// Method declarations
const methodMatch = line.match(/^def\s+(\w+)/);
if (methodMatch) {
symbols.push({
name: methodMatch[1],
type: currentClass ? 'method' : 'function',
line: lineNum,
parentClass: currentClass,
signature: line
});
}
}
}
private extractCSharpSymbols(lines: string[], symbols: Symbol[]): void {
let currentClass: string | undefined;
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
const lineNum = i + 1;
// Class declarations
const classMatch = line.match(/(?:public|private|protected|internal)?\s*(?:static\s+)?(?:partial\s+)?class\s+(\w+)/);
if (classMatch) {
currentClass = classMatch[1];
symbols.push({
name: classMatch[1],
type: 'class',
line: lineNum,
signature: line
});
continue;
}
// Method declarations
const methodMatch = line.match(/(?:public|private|protected|internal)\s+(?:static\s+)?(?:async\s+)?(?:\w+<[^>]+>|\w+)\s+(\w+)\s*\(/);
if (methodMatch) {
symbols.push({
name: methodMatch[1],
type: currentClass ? 'method' : 'function',
line: lineNum,
parentClass: currentClass,
signature: line
});
}
}
}
}