Skip to main content
Glama
CodeAnalyzer.ts21.2 kB
import { readFile } from 'fs/promises'; import { parse } from '@babel/parser'; import traverse from '@babel/traverse'; import * as t from '@babel/types'; import { glob } from 'glob'; import { CodeRepresentation, CodeRelationships, ImportRelation, ExportRelation, CallRelation, InheritanceRelation, DependencyRelation } from '../types/index.js'; import path from 'path'; /** * CodeAnalyzer extracts structural and textual features from code files * Supports multiple programming languages through AST analysis */ export class CodeAnalyzer { private supportedExtensions = [ '.js', '.jsx', '.ts', '.tsx', '.py', '.java', '.cpp', '.c', '.cs', '.go', '.rs' ]; /** * Find all code files in repository */ async findCodeFiles(repositoryPath: string): Promise<string[]> { try { const patterns = this.supportedExtensions.map(ext => path.join(repositoryPath, '**', `*${ext}`) ); const allFiles: string[] = []; for (const pattern of patterns) { const files = await glob(pattern, { ignore: [ '**/node_modules/**', '**/dist/**', '**/build/**', '**/.git/**', '**/coverage/**', '**/*.test.*', '**/*.spec.*' ] }); allFiles.push(...files); } // Remove duplicates and sort return [...new Set(allFiles)].sort(); } catch (error) { console.error('Error finding code files:', error); return []; } } /** * Analyze single file and extract features */ async analyzeFile(filePath: string): Promise<CodeRepresentation | null> { try { const content = await readFile(filePath, 'utf-8'); const extension = path.extname(filePath); // Extract textual features const textualFeatures = this.extractTextualFeatures(content, extension); // Extract structural features const structuralFeatures = await this.extractStructuralFeatures(content, extension); // Extract relationships (NEW) const relationships = await this.extractRelationships(content, filePath, extension); const codeRep: CodeRepresentation = { filePath, content, textualFeatures, structuralFeatures, semanticFeatures: { embedding: [], functionalSignature: '', behaviorPattern: '' }, relationships, // NEW: Include relationships biasScore: 0, augmentedEmbedding: [], lastModified: new Date() }; return codeRep; } catch (error) { console.error(`Error analyzing file ${filePath}:`, error); return null; } } /** * Extract textual features (comments, docstrings, identifiers) */ private extractTextualFeatures(content: string, extension: string): any { const lines = content.split('\n'); const features = { docstrings: [] as string[], comments: [] as string[], identifierNames: [] as string[], variableNames: [] as string[] }; // Extract comments based on language for (const line of lines) { const trimmed = line.trim(); // Single-line comments if (trimmed.startsWith('//') || trimmed.startsWith('#')) { features.comments.push(trimmed); } // Multi-line comments (simplified) if (trimmed.includes('/*') || trimmed.includes('"""') || trimmed.includes("'''")) { features.comments.push(trimmed); } } // Extract identifiers and variables using regex patterns const identifierPattern = /\b[a-zA-Z_][a-zA-Z0-9_]*\b/g; const matches = content.match(identifierPattern) || []; // Filter and categorize identifiers const uniqueIdentifiers = [...new Set(matches)]; features.identifierNames = uniqueIdentifiers.filter(id => id.length > 2 && !this.isKeyword(id, extension) ); // Extract potential variable names (simplified heuristic) features.variableNames = features.identifierNames.filter(id => /^[a-z]/.test(id) && !id.includes('function') && !id.includes('class') ); return features; } /** * Extract structural features using AST analysis */ private async extractStructuralFeatures(content: string, extension: string): Promise<any> { const features = { astNodes: 0, complexity: 1, // Start with 1 for base complexity nestingDepth: 0, functionCount: 0, classCount: 0 }; try { // Try to parse with Babel for JavaScript/TypeScript if (['.js', '.jsx', '.ts', '.tsx'].includes(extension)) { const ast = parse(content, { sourceType: 'module', allowImportExportEverywhere: true, allowAwaitOutsideFunction: true, plugins: [ 'jsx', 'typescript', 'decorators-legacy', 'classProperties', 'objectRestSpread', 'asyncGenerators', 'optionalChaining', 'nullishCoalescingOperator' ] }); // Traverse AST and count features let currentDepth = 0; let maxDepth = 0; traverse(ast, { enter(path) { features.astNodes++; currentDepth++; maxDepth = Math.max(maxDepth, currentDepth); // Count complexity-contributing nodes if ( path.isIfStatement() || path.isWhileStatement() || path.isForStatement() || path.isSwitchCase() || path.isConditionalExpression() || path.isLogicalExpression() ) { features.complexity++; } // Count functions if ( path.isFunctionDeclaration() || path.isFunctionExpression() || path.isArrowFunctionExpression() || path.isMethodDefinition() ) { features.functionCount++; } // Count classes if (path.isClassDeclaration()) { features.classCount++; } }, exit() { currentDepth--; } }); features.nestingDepth = maxDepth; } else { // For non-JS languages, use simpler heuristics features = this.extractStructuralHeuristics(content); } } catch (error) { // Fallback to heuristic analysis if AST parsing fails console.warn(`AST parsing failed for ${extension}, using heuristics`); features = this.extractStructuralHeuristics(content); } return features; } /** * Extract structural features using heuristics (fallback method) */ private extractStructuralHeuristics(content: string): any { const lines = content.split('\n'); const features = { astNodes: lines.length * 2, // Rough estimate complexity: 1, nestingDepth: 0, functionCount: 0, classCount: 0 }; let currentIndent = 0; let maxIndent = 0; for (const line of lines) { const trimmed = line.trim(); const indent = line.length - line.trimStart().length; currentIndent = Math.floor(indent / 2); // Assume 2-space indentation maxIndent = Math.max(maxIndent, currentIndent); // Count complexity keywords const complexityKeywords = ['if', 'while', 'for', 'switch', 'case', 'catch', '&&', '||', '?']; for (const keyword of complexityKeywords) { if (trimmed.includes(keyword)) { features.complexity++; } } // Count functions (various languages) const functionKeywords = ['def ', 'function ', 'func ', 'public ', 'private ', 'protected ']; for (const keyword of functionKeywords) { if (trimmed.startsWith(keyword) || trimmed.includes(keyword + ' ')) { features.functionCount++; } } // Count classes const classKeywords = ['class ', 'interface ', 'struct ']; for (const keyword of classKeywords) { if (trimmed.startsWith(keyword)) { features.classCount++; } } } features.nestingDepth = maxIndent; return features; } /** * Check if identifier is a language keyword */ private isKeyword(identifier: string, extension: string): boolean { const keywords = { '.js': ['var', 'let', 'const', 'function', 'if', 'else', 'for', 'while', 'return', 'class', 'import', 'export'], '.ts': ['var', 'let', 'const', 'function', 'if', 'else', 'for', 'while', 'return', 'class', 'import', 'export', 'interface', 'type'], '.py': ['def', 'class', 'if', 'else', 'for', 'while', 'return', 'import', 'from', 'try', 'except'], '.java': ['public', 'private', 'class', 'interface', 'if', 'else', 'for', 'while', 'return', 'import'], '.cpp': ['int', 'void', 'class', 'if', 'else', 'for', 'while', 'return', 'include', 'namespace'], }; const langKeywords = keywords[extension as keyof typeof keywords] || []; return langKeywords.includes(identifier.toLowerCase()); } // ================================ // RELATIONSHIP EXTRACTION METHODS // ================================ /** * Extract all relationships from code content */ async extractRelationships(content: string, filePath: string, extension: string): Promise<CodeRelationships> { const relationships: CodeRelationships = { filePath, imports: [], exports: [], functionCalls: [], classInheritance: [], dependencies: [], compositions: [], lastAnalyzed: new Date() }; try { if (['.js', '.jsx', '.ts', '.tsx'].includes(extension)) { // JavaScript/TypeScript analysis using AST await this.extractJavaScriptRelationships(content, filePath, relationships); } else if (extension === '.py') { // Python analysis using regex and heuristics await this.extractPythonRelationships(content, filePath, relationships); } else { // Generic analysis for other languages await this.extractGenericRelationships(content, filePath, relationships); } console.log(`Extracted relationships for ${filePath}: ${relationships.imports.length} imports, ${relationships.functionCalls.length} calls`); } catch (error) { console.error(`Error extracting relationships from ${filePath}:`, error); } return relationships; } /** * Extract relationships from JavaScript/TypeScript using AST */ private async extractJavaScriptRelationships( content: string, filePath: string, relationships: CodeRelationships ): Promise<void> { try { const ast = parse(content, { sourceType: 'module', allowImportExportEverywhere: true, allowAwaitOutsideFunction: true, plugins: [ 'jsx', 'typescript', 'decorators-legacy', 'classProperties', 'objectRestSpread', 'asyncGenerators', 'optionalChaining', 'nullishCoalescingOperator' ] }); traverse(ast, { // Extract import statements ImportDeclaration: (path) => { const importPath = path.node.source.value; const symbols: string[] = []; let importType: 'default' | 'named' | 'namespace' | 'dynamic' = 'named'; path.node.specifiers.forEach(spec => { if (t.isImportDefaultSpecifier(spec)) { symbols.push(spec.local.name); importType = 'default'; } else if (t.isImportSpecifier(spec)) { symbols.push(spec.imported.type === 'Identifier' ? spec.imported.name : 'unknown'); } else if (t.isImportNamespaceSpecifier(spec)) { symbols.push(spec.local.name); importType = 'namespace'; } }); relationships.imports.push({ from: filePath, to: this.resolveImportPath(importPath, filePath), symbols, importType, lineNumber: path.node.loc?.start.line, statement: content.split('\n')[path.node.loc?.start.line! - 1] || '' }); }, // Extract export statements ExportNamedDeclaration: (path) => { if (path.node.declaration) { if (t.isFunctionDeclaration(path.node.declaration) && path.node.declaration.id) { relationships.exports.push({ from: filePath, symbol: path.node.declaration.id.name, exportType: 'named', lineNumber: path.node.loc?.start.line, statement: content.split('\n')[path.node.loc?.start.line! - 1] || '' }); } else if (t.isVariableDeclaration(path.node.declaration)) { path.node.declaration.declarations.forEach(decl => { if (t.isIdentifier(decl.id)) { relationships.exports.push({ from: filePath, symbol: decl.id.name, exportType: 'named', lineNumber: path.node.loc?.start.line, statement: content.split('\n')[path.node.loc?.start.line! - 1] || '' }); } }); } } }, // Extract default exports ExportDefaultDeclaration: (path) => { let symbolName = 'default'; if (t.isFunctionDeclaration(path.node.declaration) && path.node.declaration.id) { symbolName = path.node.declaration.id.name; } else if (t.isClassDeclaration(path.node.declaration) && path.node.declaration.id) { symbolName = path.node.declaration.id.name; } relationships.exports.push({ from: filePath, symbol: symbolName, exportType: 'default', lineNumber: path.node.loc?.start.line, statement: content.split('\n')[path.node.loc?.start.line! - 1] || '' }); }, // Extract class inheritance ClassDeclaration: (path) => { if (path.node.superClass && t.isIdentifier(path.node.superClass) && path.node.id) { relationships.classInheritance.push({ from: path.node.id.name, to: path.node.superClass.name, type: 'extends', lineNumber: path.node.loc?.start.line }); } // Extract implemented interfaces (TypeScript) if ('implements' in path.node && path.node.implements) { path.node.implements.forEach((impl: any) => { if (t.isIdentifier(impl.expression) && path.node.id) { relationships.classInheritance.push({ from: path.node.id.name, to: impl.expression.name, type: 'implements', lineNumber: path.node.loc?.start.line, interfaceName: impl.expression.name }); } }); } }, // Extract function calls CallExpression: (path) => { let functionName = ''; let objectName = ''; if (t.isIdentifier(path.node.callee)) { functionName = path.node.callee.name; } else if (t.isMemberExpression(path.node.callee)) { if (t.isIdentifier(path.node.callee.object)) { objectName = path.node.callee.object.name; } if (t.isIdentifier(path.node.callee.property)) { functionName = path.node.callee.property.name; } } if (functionName) { relationships.functionCalls.push({ from: filePath, to: functionName, object: objectName || undefined, lineNumber: path.node.loc?.start.line, callType: 'direct', // Could be enhanced to detect method vs direct calls context: this.findContainingFunction(path) || 'global' }); } } }); } catch (error) { console.error('Error in JavaScript AST analysis:', error); } } /** * Extract relationships from Python using regex patterns */ private async extractPythonRelationships( content: string, filePath: string, relationships: CodeRelationships ): Promise<void> { const lines = content.split('\n'); for (let i = 0; i < lines.length; i++) { const line = lines[i].trim(); const lineNumber = i + 1; // Extract import statements const importMatch = line.match(/^import\s+(.+)/); if (importMatch) { const modules = importMatch[1].split(',').map(m => m.trim()); modules.forEach(module => { relationships.imports.push({ from: filePath, to: module, symbols: [module], importType: 'named', lineNumber, statement: line }); }); } // Extract from imports const fromImportMatch = line.match(/^from\s+([^\s]+)\s+import\s+(.+)/); if (fromImportMatch) { const module = fromImportMatch[1]; const symbols = fromImportMatch[2].split(',').map(s => s.trim()); relationships.imports.push({ from: filePath, to: module, symbols, importType: 'named', lineNumber, statement: line }); } // Extract class inheritance const classMatch = line.match(/^class\s+(\w+)\s*\(\s*([^)]+)\s*\):/); if (classMatch) { const className = classMatch[1]; const parentClasses = classMatch[2].split(',').map(p => p.trim()); parentClasses.forEach(parent => { relationships.classInheritance.push({ from: className, to: parent, type: 'extends', lineNumber }); }); } // Extract function calls (simple pattern) const callMatches = line.matchAll(/(\w+)\.(\w+)\(/g); for (const match of callMatches) { relationships.functionCalls.push({ from: filePath, to: match[2], object: match[1], lineNumber, callType: 'method', context: this.findPythonFunction(lines, i) || 'global' }); } } } /** * Extract relationships using generic patterns for unsupported languages */ private async extractGenericRelationships( content: string, filePath: string, relationships: CodeRelationships ): Promise<void> { const lines = content.split('\n'); for (let i = 0; i < lines.length; i++) { const line = lines[i].trim(); const lineNumber = i + 1; // Generic import patterns const importPatterns = [ /^#include\s*[<"]([^>"]+)[>"]/, // C/C++ /^import\s+([^;]+);/, // Java /^using\s+([^;]+);/, // C# ]; for (const pattern of importPatterns) { const match = line.match(pattern); if (match) { relationships.dependencies.push({ from: filePath, to: match[1], dependencyType: 'builtin', usage: ['include'], }); } } // Generic class inheritance patterns const inheritancePatterns = [ /class\s+(\w+)\s*:\s*public\s+(\w+)/, // C++ /class\s+(\w+)\s+extends\s+(\w+)/, // Java ]; for (const pattern of inheritancePatterns) { const match = line.match(pattern); if (match) { relationships.classInheritance.push({ from: match[1], to: match[2], type: 'extends', lineNumber }); } } } } /** * Resolve import path relative to current file */ private resolveImportPath(importPath: string, currentFile: string): string { if (importPath.startsWith('./') || importPath.startsWith('../')) { // Relative import const currentDir = path.dirname(currentFile); return path.resolve(currentDir, importPath); } else { // Absolute or node_modules import return importPath; } } /** * Find containing function name for a Babel path */ private findContainingFunction(path: any): string | null { let parent = path.parent; while (parent) { if (t.isFunctionDeclaration(parent) && parent.id) { return parent.id.name; } else if (t.isMethodDefinition(parent) && t.isIdentifier(parent.key)) { return parent.key.name; } parent = parent.parent; } return null; } /** * Find containing Python function name */ private findPythonFunction(lines: string[], currentLine: number): string | null { for (let i = currentLine; i >= 0; i--) { const line = lines[i].trim(); const funcMatch = line.match(/^def\s+(\w+)/); if (funcMatch) { return funcMatch[1]; } } return null; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ulasbilgen/sacl'

If you have feedback or need assistance with the MCP directory API, please join our Discord server