Skip to main content
Glama
repository-analyzer.ts21.8 kB
import * as fs from 'fs'; import * as path from 'path'; import { COMPLEXITY_THRESHOLDS, CONFIG_FILE_MAPPINGS, DEPENDENCY_MAPPINGS, EXTENSION_LANGUAGE_MAPPINGS, LAYER_MAPPINGS, } from './repository-analyzer-config'; import { DEFAULT_SECURITY_CONFIG, MemoryMonitor, ResourceManager, SecurityConfig, validatePath, } from './security.utils'; const { readFile, readdir, stat, access } = fs.promises; export interface TechStack { languages: string[]; frameworks: string[]; databases: string[]; tools: string[]; packageManager?: string; runtime?: string; packageMetadata?: { hasMainField?: boolean; hasExportsField?: boolean; hasTypesField?: boolean; hasBinField?: boolean; isPrivate?: boolean; }; } export interface ArchitectureInfo { pattern: string; layers: string[]; patterns: string[]; complexity: 'simple' | 'moderate' | 'complex'; } export interface RepositoryMetadata { techStack: TechStack; architecture: ArchitectureInfo; projectType: string; size: { files: number; linesOfCode?: number; }; createdDate: string; } export class RepositoryAnalyzer { private rootPath: string; private logger?: { debug: (message: string) => void; error: (message: string, context?: any) => void; }; private securityConfig: SecurityConfig; private resourceManager: ResourceManager; constructor( rootPath: string, logger?: { debug: (message: string) => void; error: (message: string, context?: any) => void }, securityConfig: SecurityConfig = DEFAULT_SECURITY_CONFIG, ) { // Validate and normalize root path to prevent traversal this.rootPath = path.resolve(rootPath); this.logger = logger; this.securityConfig = securityConfig; this.resourceManager = new ResourceManager(); } /** * Asynchronously check if a file exists using fs.promises.access */ private async fileExists(filePath: string): Promise<boolean> { try { // Validate path before checking existence const safePath = validatePath(filePath, this.rootPath); await access(safePath, fs.constants.F_OK); return true; } catch { return false; } } async analyzeRepository(): Promise<RepositoryMetadata> { const memoryMonitor = new MemoryMonitor(); try { const techStack = await this.analyzeTechStack(); // Check memory usage after tech stack analysis memoryMonitor.checkMemoryUsage(); const files = await this.getFileListSecurely(); const architecture = await this.analyzeArchitecture(techStack, files.length); // Check memory usage after file analysis memoryMonitor.checkMemoryUsage(); const projectInfo = await this.analyzeProjectInfo(); const projectType = this.inferProjectType(techStack, architecture); return { techStack, architecture, projectType, size: projectInfo.size, createdDate: projectInfo.createdDate, }; } finally { // Always clean up resources await this.resourceManager.cleanup(); } } private async analyzeTechStack(): Promise<TechStack> { const techStack: TechStack = { languages: [], frameworks: [], databases: [], tools: [], }; await this.analyzePackageJsonSecurely(techStack); await this.analyzeConfigFiles(techStack); await this.analyzeFileExtensions(techStack); return techStack; } private async analyzePackageJsonSecurely(techStack: TechStack): Promise<void> { try { const packageJsonPath = path.join(this.rootPath, 'package.json'); // Use secure file reading with resource management const packageJsonContent = await this.resourceManager.readFileSecurely(packageJsonPath); // Use safe JSON parsing with validation const packageJson = this.safeParsePackageJson(packageJsonContent); if (!packageJson) { return; // Failed to parse, continue without package.json data } // Detect package manager securely if (await this.fileExists(path.join(this.rootPath, 'pnpm-lock.yaml'))) { techStack.packageManager = 'pnpm'; } else if (await this.fileExists(path.join(this.rootPath, 'yarn.lock'))) { techStack.packageManager = 'yarn'; } else if (await this.fileExists(path.join(this.rootPath, 'package-lock.json'))) { techStack.packageManager = 'npm'; } // Detect runtime and dependencies const dependencies = { ...packageJson.dependencies, ...packageJson.devDependencies }; this.extractFrameworksFromDependencies(dependencies, techStack); this.extractDatabasesFromDependencies(dependencies, techStack); this.extractToolsFromDependencies(dependencies, techStack); // Extract package metadata securely techStack.packageMetadata = { hasMainField: Boolean(packageJson.main), hasExportsField: Boolean(packageJson.exports), hasTypesField: Boolean(packageJson.types || packageJson.typings), hasBinField: Boolean(packageJson.bin), isPrivate: Boolean(packageJson.private), }; // Detect TypeScript if present if ( dependencies.typescript || dependencies['@types/node'] || (await this.fileExists(path.join(this.rootPath, 'tsconfig.json'))) ) { if (!techStack.languages.includes('TypeScript')) { techStack.languages.push('TypeScript'); } } // Detect JavaScript (always present if package.json exists) if (!techStack.languages.includes('JavaScript')) { techStack.languages.push('JavaScript'); } // Detect Node.js runtime if (dependencies['@types/node'] || packageJson.engines?.node) { techStack.runtime = 'Node.js'; } } catch (error) { // Enhanced error handling const errorMessage = `Failed to analyze package.json: ${error instanceof Error ? error.message : String(error)}`; if (this.logger) { this.logger.error(`[RepositoryAnalyzer] ${errorMessage}`, { error: error instanceof Error ? error.toString() : String(error), rootPath: this.rootPath, }); } else { // Skip logging when no logger is available to maintain consistency } } } /** * Safely parse package.json with comprehensive validation */ private safeParsePackageJson(content: string): any { try { // Basic validation if (!content || typeof content !== 'string') { return null; } // Length validation (package.json should not be huge) if (content.length > 1024 * 1024) { // 1MB max throw new Error('package.json file too large'); } const parsed = JSON.parse(content); // Validate it's an object with expected structure if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { return null; } return parsed; } catch (error) { if (this.logger) { this.logger.error('[RepositoryAnalyzer] Failed to parse package.json', { error: error instanceof Error ? error.message : String(error), }); } return null; } } private async analyzeConfigFiles(techStack: TechStack): Promise<void> { for (const config of CONFIG_FILE_MAPPINGS) { if (await this.fileExists(path.join(this.rootPath, config.file))) { if (config.file === 'tsconfig.json' && !techStack.languages.includes('TypeScript')) { techStack.languages.push('TypeScript'); } else if (config.file.startsWith('Cargo') && !techStack.languages.includes('Rust')) { techStack.languages.push('Rust'); } else if (config.file.includes('requirements') || config.file.includes('pyproject')) { if (!techStack.languages.includes('Python')) { techStack.languages.push('Python'); } } else if (config.file.includes('Docker')) { techStack.tools.push(config.indicates); } } } } private async analyzeFileExtensions(techStack: TechStack): Promise<void> { try { const files = await this.getFileListSecurely(); const extensions = new Set<string>(); // Use security config maxFiles instead of hardcoded limit const maxFiles = this.securityConfig.maxFiles; const filesToAnalyze = files.slice(0, maxFiles); // Warn if we're truncating analysis due to file limit if (files.length > maxFiles) { const warningMessage = `Repository analysis truncated: ${files.length} files found, analyzing only first ${maxFiles} files. Consider increasing maxFiles limit for complete analysis.`; if (this.logger) { this.logger.debug(`[RepositoryAnalyzer] ${warningMessage}`); } } filesToAnalyze.forEach((file: string) => { const ext = path.extname(file).toLowerCase(); if (ext) { extensions.add(ext); } }); // Map extensions to languages using externalized mappings extensions.forEach((ext) => { const language = EXTENSION_LANGUAGE_MAPPINGS[ext as keyof typeof EXTENSION_LANGUAGE_MAPPINGS]; if (language && !techStack.languages.includes(language)) { techStack.languages.push(language); } }); } catch (error) { // Critical error - fail analysis rather than continue silently const errorMessage = `Failed to analyze file extensions: ${error instanceof Error ? error.message : String(error)}`; if (this.logger) { this.logger.error(`[RepositoryAnalyzer] ${errorMessage}`, { error: error instanceof Error ? error.toString() : String(error), rootPath: this.rootPath, }); } // Throw error to prevent incomplete analysis from being treated as complete throw new Error(`Repository analysis failed during file extension analysis: ${errorMessage}`); } } private async analyzeArchitecture( techStack?: TechStack, fileCount?: number, ): Promise<ArchitectureInfo> { const directories = await this.getDirectories(this.rootPath); const layers: string[] = []; const patterns: string[] = []; // Common architectural patterns using externalized mappings directories.forEach((dir) => { const layer = LAYER_MAPPINGS[dir.toLowerCase() as keyof typeof LAYER_MAPPINGS]; if (layer) { layers.push(layer); } }); // Detect patterns if (directories.some((d) => d.includes('service'))) { patterns.push('Service-Oriented'); } if (directories.includes('mcp') || directories.includes('tools')) { patterns.push('MCP Server'); } if (directories.includes('cli')) { patterns.push('CLI Application'); } if (directories.some((d) => d.includes('component'))) { patterns.push('Component-Based'); } if (layers.length >= 4) { patterns.push('Layered Architecture'); } const complexity = this.assessComplexity(directories, layers, techStack, fileCount); const pattern = this.inferArchitecturalPattern(patterns, layers); return { pattern, layers, patterns, complexity, }; } private async analyzeProjectInfo(): Promise<{ size: { files: number }; createdDate: string }> { try { const files = await this.getFileListSecurely(); const stats = await fs.promises.stat(this.rootPath); return { size: { files: files.length, }, createdDate: (stats.birthtime || stats.ctime).toISOString(), }; } catch (error) { return { size: { files: 0 }, createdDate: new Date().toISOString(), }; } } private inferProjectType(techStack: TechStack, architecture: ArchitectureInfo): string { if (architecture.patterns.includes('MCP Server')) { return 'MCP Server'; } if (architecture.patterns.includes('CLI Application')) { return 'CLI Tool'; } // Check for CLI tool indicators if (techStack.packageMetadata?.hasBinField) { return 'CLI Tool'; } if ( techStack.frameworks.some( (f) => f.includes('React') || f.includes('Vue') || f.includes('Angular'), ) ) { return 'Web Application'; } if (techStack.frameworks.some((f) => f.includes('Express') || f.includes('Fastify'))) { return 'Web Server'; } // Refined library/framework detection with additional criteria const hasTestingTools = techStack.tools.includes('Jest') || techStack.tools.includes('Vitest'); const hasLibraryIndicators = techStack.packageMetadata?.hasMainField || techStack.packageMetadata?.hasExportsField || techStack.packageMetadata?.hasTypesField; const isNotPrivate = !techStack.packageMetadata?.isPrivate; // More sophisticated library detection: testing tools + library indicators + not private if (hasTestingTools && hasLibraryIndicators && isNotPrivate) { return 'Library/Framework'; } return 'Application'; } private inferArchitecturalPattern(patterns: string[], layers: string[]): string { if (patterns.includes('MCP Server')) { return 'MCP Server Architecture'; } if (patterns.includes('Service-Oriented')) { return 'Service-Oriented Architecture'; } if (patterns.includes('Layered Architecture')) { return 'Layered Architecture'; } if (patterns.includes('Component-Based')) { return 'Component-Based Architecture'; } return 'Modular Architecture'; } /** * Assess project complexity based on directory structure and architectural layers. * Uses externalized thresholds with documented rationale. * Enhanced with dependency and file count factors for more nuanced assessment. */ private assessComplexity( directories: string[], layers: string[], techStack?: TechStack, fileCount?: number, ): 'simple' | 'moderate' | 'complex' { // Enhanced complexity scoring with multiple factors let complexityScore = directories.length + layers.length; // Add dependency complexity factor if (techStack) { const totalDeps = techStack.frameworks.length + techStack.databases.length + techStack.tools.length; complexityScore += Math.floor(totalDeps / 3); // Weight dependencies less heavily } // Add file count factor for very large projects if (fileCount && fileCount > 500) { complexityScore += Math.floor(fileCount / 250); } if (complexityScore <= COMPLEXITY_THRESHOLDS.simple.max) { return 'simple'; // Small projects with minimal structure (< 5 components) } if (complexityScore <= COMPLEXITY_THRESHOLDS.moderate.max) { return 'moderate'; // Typical applications with clear structure (5-9 components) } return 'complex'; // Enterprise/monolithic applications with extensive structure (10+ components) } /** * Securely get directory list with path validation */ private async getDirectories(dirPath: string): Promise<string[]> { try { // Validate directory path const safeDirPath = validatePath(dirPath, this.rootPath); const entries = await readdir(safeDirPath); const directories: string[] = []; for (const entry of entries) { if (entry.startsWith('.')) { continue; // Skip hidden directories } try { const entryPath = path.join(safeDirPath, entry); // Additional security check - ensure we don't traverse outside root validatePath(entryPath, this.rootPath); const entryStat = await stat(entryPath); if (entryStat.isDirectory()) { directories.push(entry); } } catch (error) { // Skip entries that cause errors (could be permission issues or invalid paths) } } return directories; } catch (error) { return []; } } /** * Secure file list traversal with memory limits and path validation */ private async getFileListSecurely(): Promise<string[]> { const files: string[] = []; const queue = [this.rootPath]; const memoryMonitor = new MemoryMonitor(200); // 200MB limit for file listing let fileCount = 0; let criticalErrors = 0; const maxCriticalErrors = 5; // Fail after 5 critical errors const currentDepth = 0; while (queue.length > 0 && fileCount < this.securityConfig.maxFiles) { const currentPath = queue.shift()!; // Check memory usage periodically if (fileCount % 100 === 0) { memoryMonitor.checkMemoryUsage(); } // Calculate current depth to prevent infinite recursion const relativePath = path.relative(this.rootPath, currentPath); const depth = relativePath ? relativePath.split(path.sep).length : 0; if (depth > this.securityConfig.maxDirectoryDepth) { continue; // Skip directories that are too deep } try { // Validate current path validatePath(currentPath, this.rootPath); const entries = await readdir(currentPath); for (const entry of entries) { if (entry.startsWith('.')) { continue; // Skip hidden files/directories } try { const entryPath = path.join(currentPath, entry); // Validate entry path and check if it should be ignored validatePath(entryPath, this.rootPath); const relativeEntryPath = path.relative(this.rootPath, entryPath); if (this.resourceManager.shouldIgnorePath(relativeEntryPath, this.securityConfig)) { continue; } const entryStat = await stat(entryPath); if (entryStat.isDirectory()) { queue.push(entryPath); } else if (entryStat.isFile()) { // Check file size before adding to list if (entryStat.size <= this.securityConfig.maxFileSize) { files.push(entryPath); fileCount++; // Check if we've hit the file limit if (fileCount >= this.securityConfig.maxFiles) { if (this.logger) { this.logger.debug( `[RepositoryAnalyzer] Hit file limit (${this.securityConfig.maxFiles}), stopping traversal`, ); } break; } } } } catch (entryError) { // Skip problematic entries but continue processing } } } catch (error) { criticalErrors++; const errorMessage = `Failed to read directory ${currentPath}: ${error instanceof Error ? error.message : String(error)}`; if (this.logger) { this.logger.error(`[RepositoryAnalyzer] ${errorMessage}`, { error: error instanceof Error ? error.toString() : String(error), currentPath, rootPath: this.rootPath, criticalErrors, }); } // Fail fast if too many critical errors occur if (criticalErrors >= maxCriticalErrors) { const failureMessage = `Repository analysis failed: ${criticalErrors} critical directory read errors encountered. This may indicate file system corruption, permission issues, or other serious problems.`; if (this.logger) { this.logger.error(`[RepositoryAnalyzer] ${failureMessage}`); } throw new Error(failureMessage); } } } if (this.logger && fileCount >= this.securityConfig.maxFiles) { this.logger.debug( `[RepositoryAnalyzer] File traversal completed with limit: ${fileCount} files processed`, ); } // Report final statistics if (this.logger && (criticalErrors > 0 || fileCount >= this.securityConfig.maxFiles)) { this.logger.debug( `[RepositoryAnalyzer] Traversal completed: ${fileCount} files found, ${criticalErrors} directory errors encountered`, ); } return files; } /** * Extract frameworks from dependencies with validation */ private extractFrameworksFromDependencies( dependencies: Record<string, any>, techStack: TechStack, ): void { if (!dependencies || typeof dependencies !== 'object') { return; } Object.keys(dependencies).forEach((dep) => { const framework = DEPENDENCY_MAPPINGS.frameworks[dep as keyof typeof DEPENDENCY_MAPPINGS.frameworks]; if (framework && !techStack.frameworks.includes(framework)) { techStack.frameworks.push(framework); } }); } /** * Extract databases from dependencies with validation */ private extractDatabasesFromDependencies( dependencies: Record<string, any>, techStack: TechStack, ): void { if (!dependencies || typeof dependencies !== 'object') { return; } Object.keys(dependencies).forEach((dep) => { const database = DEPENDENCY_MAPPINGS.databases[dep as keyof typeof DEPENDENCY_MAPPINGS.databases]; if (database && !techStack.databases.includes(database)) { techStack.databases.push(database); } }); } /** * Extract tools from dependencies with validation */ private extractToolsFromDependencies( dependencies: Record<string, any>, techStack: TechStack, ): void { if (!dependencies || typeof dependencies !== 'object') { return; } Object.keys(dependencies).forEach((dep) => { const tool = DEPENDENCY_MAPPINGS.tools[dep as keyof typeof DEPENDENCY_MAPPINGS.tools]; if (tool && !techStack.tools.includes(tool)) { techStack.tools.push(tool); } }); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jakedismo/KuzuMem-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server