/**
* @fileOverview: AST-based file analyzer using ast-grep and schemas
* @module: ASTAnalyzer
* @keyFunctions:
* - ASTAnalyzer: Base class for AST-based file analysis
* - analyzeStructure(): Extract structural information from AST
* - extractNodes(): Extract specific node types from AST
* - findPatterns(): Find structural patterns in code
* @context: Provides AST-based analysis for various file types using schemas
*/
import * as fs from 'fs';
import * as path from 'path';
import { logger } from '../../../utils/logger';
import { SchemaLoader, LanguageSchema } from './schemaLoader';
/**
* AST node representation
*/
export interface ASTNode {
type: string;
text: string;
startLine: number;
endLine: number;
startColumn: number;
endColumn: number;
children?: ASTNode[];
fields?: Record<string, any>;
}
/**
* Structure analysis result
*/
export interface StructureAnalysis {
depth: number; // Maximum nesting depth
nodeCount: number; // Total number of nodes
nodeTypes: Record<string, number>; // Count of each node type
complexity: 'low' | 'medium' | 'high';
patterns: string[]; // Detected structural patterns
}
/**
* Symbol extracted from AST
*/
export interface ASTSymbol {
name: string;
type: string;
line: number;
column: number;
scope?: string;
exported?: boolean;
metadata?: Record<string, any>;
}
/**
* File analysis result
*/
export interface ASTAnalysisResult {
language: string;
structure: StructureAnalysis;
symbols: ASTSymbol[];
raw?: any; // Raw AST for advanced usage
}
/**
* Base class for AST-based file analysis
*/
export class ASTAnalyzer {
protected schemaLoader: SchemaLoader;
protected schema: LanguageSchema | null = null;
constructor(protected language: string) {
this.schemaLoader = SchemaLoader.getInstance();
}
/**
* Initialize the analyzer by loading the schema
*/
public async initialize(): Promise<boolean> {
this.schema = await this.schemaLoader.loadSchema(this.language);
if (!this.schema) {
logger.warn('Failed to load schema', { language: this.language });
return false;
}
return true;
}
/**
* Analyze a file and extract AST information
*/
public async analyzeFile(filePath: string): Promise<ASTAnalysisResult | null> {
if (!this.schema) {
await this.initialize();
}
if (!this.schema) {
return null;
}
try {
const content = await fs.promises.readFile(filePath, 'utf-8');
return this.analyzeContent(content, filePath);
} catch (error) {
logger.error('Failed to analyze file', {
filePath,
error: error instanceof Error ? error.message : String(error),
});
return null;
}
}
/**
* Analyze content string
*/
public async analyzeContent(
content: string,
filePath?: string
): Promise<ASTAnalysisResult | null> {
if (!this.schema) {
await this.initialize();
}
if (!this.schema) {
return null;
}
try {
// Use ast-grep to parse the content
const ast = await this.parseWithAstGrep(content);
if (!ast) {
return null;
}
const structure = this.analyzeStructure(ast);
const symbols = this.extractSymbols(ast);
return {
language: this.language,
structure,
symbols,
raw: ast,
};
} catch (error) {
logger.error('Failed to analyze content', {
language: this.language,
error: error instanceof Error ? error.message : String(error),
});
return null;
}
}
/**
* Parse content using ast-grep
*/
protected async parseWithAstGrep(content: string): Promise<any> {
// This would integrate with ast-grep
// For now, we'll use a placeholder that delegates to subclasses
// Real implementation would call ast-grep CLI or use its library
logger.debug('Parsing with ast-grep', { language: this.language });
// Subclasses should override this method for language-specific parsing
return this.parseLanguageSpecific(content);
}
/**
* Language-specific parsing (to be overridden by subclasses)
*/
protected parseLanguageSpecific(content: string): any {
// Default implementation: try to use native parsers
logger.warn('No language-specific parser implemented', { language: this.language });
return null;
}
/**
* Analyze structure of AST
*/
protected analyzeStructure(ast: any): StructureAnalysis {
const analysis: StructureAnalysis = {
depth: 0,
nodeCount: 0,
nodeTypes: {},
complexity: 'low',
patterns: [],
};
// Calculate structure metrics
const traverse = (node: any, depth: number = 0) => {
if (!node) return;
analysis.nodeCount++;
analysis.depth = Math.max(analysis.depth, depth);
// Count node types
const nodeType = node.type || 'unknown';
analysis.nodeTypes[nodeType] = (analysis.nodeTypes[nodeType] || 0) + 1;
// Traverse children
if (node.children && Array.isArray(node.children)) {
for (const child of node.children) {
traverse(child, depth + 1);
}
}
};
traverse(ast);
// Determine complexity
if (analysis.depth > 10 || analysis.nodeCount > 100) {
analysis.complexity = 'high';
} else if (analysis.depth > 5 || analysis.nodeCount > 50) {
analysis.complexity = 'medium';
}
// Detect patterns
analysis.patterns = this.detectPatterns(analysis);
return analysis;
}
/**
* Detect structural patterns
*/
protected detectPatterns(structure: StructureAnalysis): string[] {
const patterns: string[] = [];
// Deep nesting
if (structure.depth > 8) {
patterns.push('deep-nesting');
}
// Large file
if (structure.nodeCount > 200) {
patterns.push('large-file');
}
// Add more pattern detection as needed
return patterns;
}
/**
* Extract symbols from AST (to be overridden by subclasses)
*/
protected extractSymbols(ast: any): ASTSymbol[] {
// Default implementation: extract basic symbols
// Subclasses should override for language-specific symbol extraction
logger.debug('Extracting symbols', { language: this.language });
return [];
}
/**
* Find nodes of specific type
*/
protected findNodesByType(ast: any, nodeType: string): any[] {
const results: any[] = [];
const traverse = (node: any) => {
if (!node) return;
if (node.type === nodeType) {
results.push(node);
}
if (node.children && Array.isArray(node.children)) {
for (const child of node.children) {
traverse(child);
}
}
};
traverse(ast);
return results;
}
/**
* Find nodes matching a pattern
*/
protected findNodesMatching(ast: any, predicate: (node: any) => boolean): any[] {
const results: any[] = [];
const traverse = (node: any) => {
if (!node) return;
if (predicate(node)) {
results.push(node);
}
if (node.children && Array.isArray(node.children)) {
for (const child of node.children) {
traverse(child);
}
}
};
traverse(ast);
return results;
}
/**
* Get node text content
*/
protected getNodeText(node: any): string {
return node.text || '';
}
/**
* Get node location
*/
protected getNodeLocation(node: any): { line: number; column: number } {
return {
line: node.startLine || 0,
column: node.startColumn || 0,
};
}
}