language-parsers-simple.ts•23.9 kB
import { CodeElement, APIEndpoint } from './code-scanner.js';
import { spawn } from 'child_process';
export interface LanguageParser {
extensions: string[];
name: string;
parseFile(content: string, filePath: string): Promise<LanguageParseResult>;
supportsApiEndpoints?: boolean;
supportsFrameworkDetection?: boolean;
}
export interface LanguageParseResult {
functions: CodeElement[];
classes: CodeElement[];
interfaces: CodeElement[];
types: CodeElement[];
enums: CodeElement[];
exports: CodeElement[];
imports: CodeElement[];
apiEndpoints: APIEndpoint[];
constants: CodeElement[];
variables: CodeElement[];
}
export class MultiLanguageCodeScanner {
private parsers = new Map<string, LanguageParser>();
constructor() {
this.initializeParsers();
}
private initializeParsers() {
// Register parsers based on your tech stack
this.registerParser(new PythonParser());
this.registerParser(new GoParser());
this.registerParser(new YamlParser());
this.registerParser(new BashParser());
}
private registerParser(parser: LanguageParser) {
for (const extension of parser.extensions) {
this.parsers.set(extension, parser);
}
}
async parseFile(content: string, filePath: string): Promise<LanguageParseResult> {
const extension = this.getFileExtension(filePath);
const parser = this.parsers.get(extension);
if (parser) {
return await parser.parseFile(content, filePath);
}
// Return empty result for unsupported files
return this.getEmptyResult();
}
private getFileExtension(filePath: string): string {
return filePath.split('.').pop()?.toLowerCase() || '';
}
private getEmptyResult(): LanguageParseResult {
return {
functions: [],
classes: [],
interfaces: [],
types: [],
enums: [],
exports: [],
imports: [],
apiEndpoints: [],
constants: [],
variables: [],
};
}
getSupportedExtensions(): string[] {
return Array.from(this.parsers.keys());
}
getParserInfo(): { extension: string; parser: string }[] {
return Array.from(this.parsers.entries()).map(([ext, parser]) => ({
extension: ext,
parser: parser.name,
}));
}
}
// Python Parser Implementation using subprocess + regex fallback
export class PythonParser implements LanguageParser {
extensions = ['py', 'pyi', 'pyx', 'pxd'];
name = 'Python';
supportsApiEndpoints = true;
supportsFrameworkDetection = true;
async parseFile(content: string, filePath: string): Promise<LanguageParseResult> {
const result: LanguageParseResult = {
functions: [],
classes: [],
interfaces: [],
types: [],
enums: [],
exports: [],
imports: [],
apiEndpoints: [],
constants: [],
variables: [],
};
try {
// Try subprocess-based AST parsing first
const astResult = await this.parseWithPythonAST(content, filePath);
if (astResult) {
this.mergePythonASTResults(astResult, result, filePath);
} else {
// Fall back to regex-based parsing
this.parseWithRegex(content, result, filePath);
}
// Look for Flask/FastAPI/Django endpoints
this.findPythonApiEndpoints(content, result, filePath);
} catch (error) {
console.warn(`Failed to parse Python file ${filePath}:`, error);
// Fall back to regex-based parsing
this.parseWithRegex(content, result, filePath);
}
return result;
}
private async parseWithPythonAST(content: string, _filePath: string): Promise<any> {
return new Promise((resolve) => {
// Create a Python script to parse the AST
const pythonScript = `
import ast
import sys
import json
import tempfile
import os
try:
# Read content from stdin
content = sys.stdin.read()
tree = ast.parse(content)
result = {
'functions': [],
'classes': [],
'imports': [],
'constants': [],
'variables': []
}
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
result['functions'].append({
'name': node.name,
'line': node.lineno,
'has_docstring': ast.get_docstring(node) is not None,
'docstring': ast.get_docstring(node),
'is_async': False,
'exported': not node.name.startswith('_')
})
elif isinstance(node, ast.AsyncFunctionDef):
result['functions'].append({
'name': node.name,
'line': node.lineno,
'has_docstring': ast.get_docstring(node) is not None,
'docstring': ast.get_docstring(node),
'is_async': True,
'exported': not node.name.startswith('_')
})
elif isinstance(node, ast.ClassDef):
result['classes'].append({
'name': node.name,
'line': node.lineno,
'has_docstring': ast.get_docstring(node) is not None,
'docstring': ast.get_docstring(node),
'exported': not node.name.startswith('_')
})
elif isinstance(node, (ast.Import, ast.ImportFrom)):
if isinstance(node, ast.Import):
for alias in node.names:
result['imports'].append({
'name': alias.name,
'line': node.lineno
})
else: # ImportFrom
result['imports'].append({
'name': node.module or 'relative',
'line': node.lineno
})
elif isinstance(node, ast.Assign):
for target in node.targets:
if isinstance(target, ast.Name):
is_constant = target.id.isupper()
result['constants' if is_constant else 'variables'].append({
'name': target.id,
'line': node.lineno,
'exported': not target.id.startswith('_')
})
print(json.dumps(result))
except Exception as e:
print(json.dumps({'error': str(e)}), file=sys.stderr)
`;
// Try to execute Python AST parsing
const process = spawn('python3', ['-c', pythonScript], {
stdio: ['pipe', 'pipe', 'pipe'],
});
// Send content via stdin
process.stdin.write(content);
process.stdin.end();
let output = '';
let errorOutput = '';
process.stdout.on('data', (data) => {
output += data.toString();
});
process.stderr.on('data', (data) => {
errorOutput += data.toString();
});
process.on('close', (code) => {
if (code === 0 && output.trim()) {
try {
const result = JSON.parse(output.trim());
if (!result.error) {
resolve(result);
return;
}
} catch (e) {
// JSON parsing failed
console.warn('Failed to parse Python AST output:', e);
}
}
if (errorOutput) {
console.warn('Python AST parsing errors:', errorOutput);
}
resolve(null); // Fall back to regex parsing
});
process.on('error', () => {
resolve(null); // Python not available or failed
});
// Timeout after 5 seconds
setTimeout(() => {
process.kill();
resolve(null);
}, 5000);
});
}
private mergePythonASTResults(
astResult: any,
result: LanguageParseResult,
filePath: string,
): void {
astResult.functions?.forEach((func: any) => {
result.functions.push({
name: func.name,
type: 'function',
filePath,
line: func.line,
column: 0,
exported: func.exported,
isAsync: func.is_async,
hasJSDoc: func.has_docstring,
jsDocDescription: func.docstring || undefined,
});
});
astResult.classes?.forEach((cls: any) => {
result.classes.push({
name: cls.name,
type: 'class',
filePath,
line: cls.line,
column: 0,
exported: cls.exported,
hasJSDoc: cls.has_docstring,
jsDocDescription: cls.docstring || undefined,
});
});
astResult.imports?.forEach((imp: any) => {
result.imports.push({
name: imp.name,
type: 'import',
filePath,
line: imp.line,
column: 0,
exported: false,
});
});
astResult.constants?.forEach((constant: any) => {
result.constants.push({
name: constant.name,
type: 'variable',
filePath,
line: constant.line,
column: 0,
exported: constant.exported,
hasJSDoc: false,
});
});
astResult.variables?.forEach((variable: any) => {
result.variables.push({
name: variable.name,
type: 'variable',
filePath,
line: variable.line,
column: 0,
exported: variable.exported,
hasJSDoc: false,
});
});
}
private parseWithRegex(content: string, result: LanguageParseResult, filePath: string): void {
const lines = content.split('\n');
lines.forEach((line, index) => {
const lineNum = index + 1;
// Function definitions
const funcMatch = line.match(/^\s*(async\s+)?def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(/);
if (funcMatch) {
const isAsync = !!funcMatch[1];
const funcName = funcMatch[2];
const hasDocstring = this.hasDocstringAfterLine(lines, index);
result.functions.push({
name: funcName,
type: 'function',
filePath,
line: lineNum,
column: 0,
exported: !funcName.startsWith('_'),
isAsync,
hasJSDoc: hasDocstring,
});
}
// Class definitions
const classMatch = line.match(/^\s*class\s+([a-zA-Z_][a-zA-Z0-9_]*)/);
if (classMatch) {
const className = classMatch[1];
const hasDocstring = this.hasDocstringAfterLine(lines, index);
result.classes.push({
name: className,
type: 'class',
filePath,
line: lineNum,
column: 0,
exported: !className.startsWith('_'),
hasJSDoc: hasDocstring,
});
}
// Import statements
const importMatch = line.match(/^\s*(?:from\s+([^\s]+)\s+)?import\s+(.+)/);
if (importMatch) {
const module = importMatch[1] || importMatch[2].split(',')[0].trim();
result.imports.push({
name: module,
type: 'import',
filePath,
line: lineNum,
column: 0,
exported: false,
});
}
// Constants and variables
const assignMatch = line.match(/^\s*([A-Z_][A-Z0-9_]*)\s*=/);
if (assignMatch) {
result.constants.push({
name: assignMatch[1],
type: 'variable',
filePath,
line: lineNum,
column: 0,
exported: true,
hasJSDoc: false,
});
}
});
}
private hasDocstringAfterLine(lines: string[], lineIndex: number): boolean {
// Check if next few lines contain a docstring
for (let i = lineIndex + 1; i < Math.min(lineIndex + 3, lines.length); i++) {
const line = lines[i].trim();
if (line.startsWith('"""') || line.startsWith("'''")) {
return true;
}
}
return false;
}
private findPythonApiEndpoints(content: string, result: LanguageParseResult, filePath: string) {
// Flask patterns
const flaskPatterns = [
/@app\.(route|get|post|put|delete|patch)\s*\(\s*['"]([^'"]+)['"]/g,
/@bp\.(route|get|post|put|delete|patch)\s*\(\s*['"]([^'"]+)['"]/g,
];
// FastAPI patterns
const fastApiPatterns = [
/@app\.(get|post|put|delete|patch)\s*\(\s*['"]([^'"]+)['"]/g,
/router\.(get|post|put|delete|patch)\s*\(\s*['"]([^'"]+)['"]/g,
];
// Django patterns
const djangoPatterns = [/path\s*\(\s*['"]([^'"]+)['"]/g, /url\s*\(\s*r?['"]([^'"]+)['"]/g];
const allPatterns = [...flaskPatterns, ...fastApiPatterns, ...djangoPatterns];
allPatterns.forEach((pattern) => {
let match;
while ((match = pattern.exec(content)) !== null) {
const method =
match[1] === 'route' ? 'ALL' : (match[1].toUpperCase() as APIEndpoint['method']);
const path = match[2] || match[1]; // Handle different capture groups
// Find line number
const beforeMatch = content.substring(0, match.index!);
const line = beforeMatch.split('\n').length;
result.apiEndpoints.push({
method,
path,
filePath,
line,
hasDocumentation: this.hasEndpointDocumentation(content, match.index!),
});
}
});
}
private hasEndpointDocumentation(content: string, matchIndex: number): boolean {
const beforeMatch = content.substring(0, matchIndex);
const lines = beforeMatch.split('\n');
// Check last few lines for docstrings or comments
for (let i = Math.max(0, lines.length - 5); i < lines.length; i++) {
const line = lines[i].trim();
if (line.startsWith('"""') || line.startsWith("'''") || line.startsWith('#')) {
return true;
}
}
return false;
}
}
// Go Parser Implementation (regex-based)
export class GoParser implements LanguageParser {
extensions = ['go'];
name = 'Go';
supportsApiEndpoints = true;
async parseFile(content: string, filePath: string): Promise<LanguageParseResult> {
const result: LanguageParseResult = {
functions: [],
classes: [],
interfaces: [],
types: [],
enums: [],
exports: [],
imports: [],
apiEndpoints: [],
constants: [],
variables: [],
};
const lines = content.split('\n');
lines.forEach((line, index) => {
const lineNum = index + 1;
// Function declarations
const funcMatch = line.match(/^\s*func\s+(?:\([^)]*\)\s+)?([a-zA-Z_][a-zA-Z0-9_]*)\s*\(/);
if (funcMatch) {
const funcName = funcMatch[1];
result.functions.push({
name: funcName,
type: 'function',
filePath,
line: lineNum,
column: 0,
exported: this.isGoExported(funcName),
hasJSDoc: this.hasGoDocComment(lines, index),
});
}
// Type declarations (struct, interface, etc.)
const typeMatch = line.match(/^\s*type\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+(struct|interface)/);
if (typeMatch) {
const typeName = typeMatch[1];
const typeKind = typeMatch[2];
if (typeKind === 'struct') {
result.classes.push({
name: typeName,
type: 'class',
filePath,
line: lineNum,
column: 0,
exported: this.isGoExported(typeName),
hasJSDoc: this.hasGoDocComment(lines, index),
});
} else if (typeKind === 'interface') {
result.interfaces.push({
name: typeName,
type: 'interface',
filePath,
line: lineNum,
column: 0,
exported: this.isGoExported(typeName),
hasJSDoc: this.hasGoDocComment(lines, index),
});
}
}
// Import declarations
const importMatch = line.match(/^\s*(?:import\s*)?"([^"]+)"/);
if (importMatch) {
result.imports.push({
name: importMatch[1],
type: 'import',
filePath,
line: lineNum,
column: 0,
exported: false,
});
}
// Constants and variables
const constMatch = line.match(/^\s*(const|var)\s+([a-zA-Z_][a-zA-Z0-9_]*)/);
if (constMatch) {
const declType = constMatch[1];
const varName = constMatch[2];
const element: CodeElement = {
name: varName,
type: 'variable',
filePath,
line: lineNum,
column: 0,
exported: this.isGoExported(varName),
hasJSDoc: this.hasGoDocComment(lines, index),
};
if (declType === 'const') {
result.constants.push(element);
} else {
result.variables.push(element);
}
}
});
// Find Go API endpoints
this.findGoApiEndpoints(content, result, filePath);
return result;
}
private isGoExported(name: string): boolean {
// In Go, exported names start with uppercase letter
return name.length > 0 && name[0] === name[0].toUpperCase();
}
private hasGoDocComment(lines: string[], lineIndex: number): boolean {
// Check if previous line has a doc comment
if (lineIndex > 0) {
const prevLine = lines[lineIndex - 1].trim();
return prevLine.startsWith('//');
}
return false;
}
private findGoApiEndpoints(content: string, result: LanguageParseResult, filePath: string) {
// Common Go web framework patterns
const patterns = [
// Gin framework
/\.(GET|POST|PUT|DELETE|PATCH)\s*\(\s*"([^"]+)"/g,
// Echo framework
/\.(Get|Post|Put|Delete|Patch)\s*\(\s*"([^"]+)"/g,
// Gorilla mux
/\.HandleFunc\s*\(\s*"([^"]+)"/g,
// Standard library
/http\.HandleFunc\s*\(\s*"([^"]+)"/g,
];
patterns.forEach((pattern) => {
let match;
while ((match = pattern.exec(content)) !== null) {
let method: APIEndpoint['method'] = 'ALL';
let path: string;
if (match[1] && match[2]) {
method = match[1].toUpperCase() as APIEndpoint['method'];
path = match[2];
} else {
path = match[1] || match[2];
}
const beforeMatch = content.substring(0, match.index!);
const line = beforeMatch.split('\n').length;
result.apiEndpoints.push({
method,
path,
filePath,
line,
hasDocumentation: this.hasEndpointDocumentation(content, match.index!),
});
}
});
}
private hasEndpointDocumentation(content: string, matchIndex: number): boolean {
const beforeMatch = content.substring(0, matchIndex);
const lines = beforeMatch.split('\n');
for (let i = Math.max(0, lines.length - 5); i < lines.length; i++) {
const line = lines[i].trim();
if (line.startsWith('//') || line.startsWith('/*')) {
return true;
}
}
return false;
}
}
// YAML Parser for Kubernetes, Terraform, etc.
export class YamlParser implements LanguageParser {
extensions = ['yml', 'yaml'];
name = 'YAML';
supportsFrameworkDetection = true;
async parseFile(content: string, filePath: string): Promise<LanguageParseResult> {
const result: LanguageParseResult = {
functions: [],
classes: [],
interfaces: [],
types: [],
enums: [],
exports: [],
imports: [],
apiEndpoints: [],
constants: [],
variables: [],
};
// YAML parsing focuses on identifying Kubernetes resources, Terraform configs, etc.
this.identifyKubernetesResources(content, result, filePath);
this.identifyDockerComposeServices(content, result, filePath);
this.identifyGitHubActions(content, result, filePath);
return result;
}
private identifyKubernetesResources(
content: string,
result: LanguageParseResult,
filePath: string,
) {
const lines = content.split('\n');
let apiVersion = '';
let kind = '';
lines.forEach((line, index) => {
const lineNum = index + 1;
const apiMatch = line.match(/^\s*apiVersion:\s*(.+)/);
if (apiMatch) {
apiVersion = apiMatch[1].trim();
}
const kindMatch = line.match(/^\s*kind:\s*(.+)/);
if (kindMatch) {
kind = kindMatch[1].trim();
result.types.push({
name: `${kind} (${apiVersion})`,
type: 'type',
filePath,
line: lineNum,
column: 0,
exported: true,
hasJSDoc: false,
});
}
});
}
private identifyDockerComposeServices(
content: string,
result: LanguageParseResult,
filePath: string,
) {
let inServicesSection = false;
const lines = content.split('\n');
lines.forEach((line, index) => {
if (line.trim() === 'services:') {
inServicesSection = true;
return;
}
if (inServicesSection && line.match(/^[a-zA-Z]/)) {
inServicesSection = false; // Left services section
}
if (inServicesSection) {
const serviceMatch = line.match(/^\s+([a-zA-Z0-9_-]+):\s*$/);
if (serviceMatch) {
result.types.push({
name: `service: ${serviceMatch[1]}`,
type: 'type',
filePath,
line: index + 1,
column: 0,
exported: true,
hasJSDoc: false,
});
}
}
});
}
private identifyGitHubActions(content: string, result: LanguageParseResult, filePath: string) {
if (!filePath.includes('.github/workflows/')) return;
const lines = content.split('\n');
let inJobsSection = false;
lines.forEach((line, index) => {
if (line.trim() === 'jobs:') {
inJobsSection = true;
return;
}
if (inJobsSection && line.match(/^[a-zA-Z]/)) {
inJobsSection = false;
}
if (inJobsSection) {
const jobMatch = line.match(/^\s+([a-zA-Z0-9_-]+):\s*$/);
if (jobMatch) {
result.functions.push({
name: `job: ${jobMatch[1]}`,
type: 'function',
filePath,
line: index + 1,
column: 0,
exported: true,
hasJSDoc: false,
});
}
}
});
}
}
// Bash Parser for DevOps scripts
export class BashParser implements LanguageParser {
extensions = ['sh', 'bash', 'zsh'];
name = 'Bash';
async parseFile(content: string, filePath: string): Promise<LanguageParseResult> {
const result: LanguageParseResult = {
functions: [],
classes: [],
interfaces: [],
types: [],
enums: [],
exports: [],
imports: [],
apiEndpoints: [],
constants: [],
variables: [],
};
const lines = content.split('\n');
lines.forEach((line, index) => {
const lineNum = index + 1;
// Function definitions
const funcMatch = line.match(/^\s*(?:function\s+)?([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\)/);
if (funcMatch) {
const functionName = funcMatch[1];
result.functions.push({
name: functionName,
type: 'function',
filePath,
line: lineNum,
column: 0,
exported: true, // Bash functions are generally available in scope
hasJSDoc: this.hasBashDocComment(lines, index),
});
}
// Variable assignments
const varMatch = line.match(/^\s*([A-Z_][A-Z0-9_]*)\s*=/);
if (varMatch) {
const varName = varMatch[1];
const isConstant = varName === varName.toUpperCase();
const element: CodeElement = {
name: varName,
type: 'variable',
filePath,
line: lineNum,
column: 0,
exported: true,
hasJSDoc: this.hasBashDocComment(lines, index),
};
if (isConstant) {
result.constants.push(element);
} else {
result.variables.push(element);
}
}
});
return result;
}
private hasBashDocComment(lines: string[], lineIndex: number): boolean {
// Check if previous line has a comment
if (lineIndex > 0) {
const prevLine = lines[lineIndex - 1].trim();
return prevLine.startsWith('#');
}
return false;
}
}