MCP Codebase Index

Overview Schema Related Servers Score Discussions

indexer.ts•10.3 KiB

// src/core/indexer.ts import * as fs from 'fs'; import * as path from 'path'; import * as crypto from 'crypto'; import { CodeChunk, FileMetadata } from '../types/index.js'; export class CodeIndexer { constructor(private repoPath: string) { } /** * Calculate MD5 hash of file content for change detection */ calculateFileHash(filePath: string): string { try { const content = fs.readFileSync(filePath, 'utf-8'); return crypto.createHash('md5').update(content).digest('hex'); } catch (error) { console.error(`Error calculating hash for ${filePath}:`, error); return ''; } } /** * Categorize files into new, modified, and unchanged */ categorizeFiles( allFiles: string[], indexedFiles: Map<string, FileMetadata> ): { newFiles: string[]; modifiedFiles: string[]; unchangedFiles: string[]; deletedFiles: string[]; } { const newFiles: string[] = []; const modifiedFiles: string[] = []; const unchangedFiles: string[] = []; const deletedFiles: string[] = []; // Find deleted files (in index but not on disk) for (const [filePath, _] of indexedFiles) { const fullPath = path.join(this.repoPath, filePath); if (!fs.existsSync(fullPath)) { deletedFiles.push(filePath); } } // Categorize current files for (const filePath of allFiles) { const relativePath = path.relative(this.repoPath, filePath); const metadata = indexedFiles.get(relativePath); if (!metadata) { // New file newFiles.push(filePath); } else { // Check if modified const currentHash = this.calculateFileHash(filePath); if (currentHash !== metadata.hash) { modifiedFiles.push(filePath); } else { unchangedFiles.push(filePath); } } } return { newFiles, modifiedFiles, unchangedFiles, deletedFiles }; } /** * Parse file and extract chunks */ async parseFile(filePath: string): Promise<CodeChunk[]> { try { const content = fs.readFileSync(filePath, 'utf-8'); const language = this.detectLanguage(filePath); // For now, use simple line-based chunking // In production, use proper AST parsing per language const chunks = this.chunkByStructure(content, filePath, language); // Split chunks that are too large (Gemini limit is 36KB, we use 20KB to be safe) return this.splitLargeChunks(chunks); } catch (error) { console.error(`Error parsing ${filePath}:`, error); return []; } } /** * Split chunks that exceed size limit */ private splitLargeChunks(chunks: CodeChunk[]): CodeChunk[] { const MAX_CHUNK_SIZE = 20000; // 20KB (safe limit, Gemini allows 36KB) const result: CodeChunk[] = []; for (const chunk of chunks) { const size = Buffer.byteLength(chunk.content, 'utf-8'); if (size <= MAX_CHUNK_SIZE) { result.push(chunk); continue; } // Split large chunk into smaller pieces const lines = chunk.content.split('\n'); const linesPerChunk = Math.ceil(lines.length / Math.ceil(size / MAX_CHUNK_SIZE)); for (let i = 0; i < lines.length; i += linesPerChunk) { const subLines = lines.slice(i, i + linesPerChunk); const subContent = subLines.join('\n'); result.push({ ...chunk, id: `${chunk.id}_part${Math.floor(i / linesPerChunk)}`, content: subContent, startLine: chunk.startLine + i, endLine: chunk.startLine + i + subLines.length, complexity: this.estimateComplexity(subContent) }); } } return result; } private detectLanguage(filePath: string): string { const ext = path.extname(filePath); const langMap: Record<string, string> = { '.py': 'python', '.js': 'javascript', '.ts': 'typescript', '.tsx': 'typescript', '.jsx': 'javascript', '.java': 'java', '.go': 'go', '.rs': 'rust', '.cpp': 'cpp', '.c': 'c', '.cs': 'csharp', '.rb': 'ruby', '.php': 'php', '.swift': 'swift', '.kt': 'kotlin', '.dart': 'dart', '.vue': 'vue', '.svelte': 'svelte' }; return langMap[ext] || 'unknown'; } /** * Simple chunking strategy: split by functions/classes * For production, integrate with proper language-specific parsers */ private chunkByStructure( content: string, filePath: string, language: string ): CodeChunk[] { const chunks: CodeChunk[] = []; const lines = content.split('\n'); let currentChunk = ''; let startLine = 0; let chunkId = 0; // Regex patterns for function/class detection const patterns = this.getPatterns(language); for (let i = 0; i < lines.length; i++) { const line = lines[i]; // Check if line starts a new function/class const matches = patterns.functionPattern.exec(line) || patterns.classPattern.exec(line); if (matches && currentChunk.trim()) { // Save previous chunk const chunk: CodeChunk = { id: `${filePath}:${startLine}:${chunkId++}`, content: currentChunk, type: 'function', name: this.extractName(currentChunk, language), filePath: path.relative(this.repoPath, filePath), startLine, endLine: i, language, imports: this.extractImports(content), complexity: this.estimateComplexity(currentChunk) }; chunks.push(chunk); currentChunk = line; startLine = i; } else { currentChunk += (currentChunk ? '\n' : '') + line; } } // Save last chunk if (currentChunk.trim()) { chunks.push({ id: `${filePath}:${startLine}:${chunkId}`, content: currentChunk, type: 'function', name: this.extractName(currentChunk, language), filePath: path.relative(this.repoPath, filePath), startLine, endLine: lines.length, language, imports: this.extractImports(content), complexity: this.estimateComplexity(currentChunk) }); } return chunks; } private getPatterns(language: string) { const patterns: Record<string, { functionPattern: RegExp; classPattern: RegExp }> = { python: { functionPattern: /^\s*def\s+\w+/, classPattern: /^\s*class\s+\w+/ }, typescript: { functionPattern: /^\s*(export\s+)?(async\s+)?function\s+\w+|^\s*\w+\s*=\s*(async\s+)?$.*?$\s*=>/, classPattern: /^\s*(export\s+)?(class|interface)\s+\w+/ }, javascript: { functionPattern: /^\s*(export\s+)?(async\s+)?function\s+\w+|^\s*\w+\s*=\s*(async\s+)?$.*?$\s*=>/, classPattern: /^\s*(export\s+)?class\s+\w+/ }, dart: { functionPattern: /^\s*(void|Future|String|int|bool|dynamic|var)\s+\w+\s*\(/, classPattern: /^\s*class\s+\w+/ } }; return patterns[language] || { functionPattern: /^\s*function\s+\w+|^\s*def\s+\w+/, classPattern: /^\s*class\s+\w+/ }; } private extractName(chunk: string, language: string): string { const lines = chunk.split('\n').slice(0, 5); for (const line of lines) { // Python if (language === 'python') { const match = /^\s*(?:def|class)\s+(\w+)/.exec(line); if (match) return match[1]; } // TypeScript/JavaScript else if (language === 'typescript' || language === 'javascript') { let match = /^\s*(?:export\s+)?(?:async\s+)?function\s+(\w+)/.exec(line); if (match) return match[1]; match = /^\s*(?:export\s+)?(?:class|interface)\s+(\w+)/.exec(line); if (match) return match[1]; match = /^\s*(\w+)\s*=\s*(?:async\s+)?\(/.exec(line); if (match) return match[1]; } // Dart else if (language === 'dart') { let match = /^\s*(?:void|Future|String|int|bool|dynamic|var)\s+(\w+)\s*\(/.exec(line); if (match) return match[1]; match = /^\s*class\s+(\w+)/.exec(line); if (match) return match[1]; } } return 'anonymous'; } private extractImports(content: string): string[] { const imports: string[] = []; const lines = content.split('\n'); for (const line of lines.slice(0, 50)) { // Check first 50 lines if (line.match(/^import\s|^from\s|^require\s|^using\s/)) { imports.push(line.trim()); } if (imports.length > 0 && !line.match(/^import|^from|^require|^using|^\/\/|^#|^\s*$/)) { break; // Stop after imports section } } return imports; } private estimateComplexity(chunk: string): number { let score = 1; const ifMatches = chunk.match(/if\s*\(/g); const forMatches = chunk.match(/for\s*\(/g); const whileMatches = chunk.match(/while\s*\(/g); if (ifMatches) score += ifMatches.length; if (forMatches) score += forMatches.length * 2; if (whileMatches) score += whileMatches.length * 2; return Math.min(score, 5); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/NgoTaiCo/mcp-codebase-index'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

indexer.ts•10.3 KiB