Skip to main content
Glama

Claude Context

by zilliztech
context.tsโ€ข49.3 kB
import { Splitter, CodeChunk, AstCodeSplitter } from './splitter'; import { Embedding, EmbeddingVector, OpenAIEmbedding } from './embedding'; import { VectorDatabase, VectorDocument, VectorSearchResult, HybridSearchRequest, HybridSearchOptions, HybridSearchResult } from './vectordb'; import { SemanticSearchResult } from './types'; import { envManager } from './utils/env-manager'; import * as fs from 'fs'; import * as path from 'path'; import * as crypto from 'crypto'; import { FileSynchronizer } from './sync/synchronizer'; const DEFAULT_SUPPORTED_EXTENSIONS = [ // Programming languages '.ts', '.tsx', '.js', '.jsx', '.py', '.java', '.cpp', '.c', '.h', '.hpp', '.cs', '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala', '.m', '.mm', // Text and markup files '.md', '.markdown', '.ipynb', // '.txt', '.json', '.yaml', '.yml', '.xml', '.html', '.htm', // '.css', '.scss', '.less', '.sql', '.sh', '.bash', '.env' ]; const DEFAULT_IGNORE_PATTERNS = [ // Common build output and dependency directories 'node_modules/**', 'dist/**', 'build/**', 'out/**', 'target/**', 'coverage/**', '.nyc_output/**', // IDE and editor files '.vscode/**', '.idea/**', '*.swp', '*.swo', // Version control '.git/**', '.svn/**', '.hg/**', // Cache directories '.cache/**', '__pycache__/**', '.pytest_cache/**', // Logs and temporary files 'logs/**', 'tmp/**', 'temp/**', '*.log', // Environment and config files '.env', '.env.*', '*.local', // Minified and bundled files '*.min.js', '*.min.css', '*.min.map', '*.bundle.js', '*.bundle.css', '*.chunk.js', '*.vendor.js', '*.polyfills.js', '*.runtime.js', '*.map', // source map files 'node_modules', '.git', '.svn', '.hg', 'build', 'dist', 'out', 'target', '.vscode', '.idea', '__pycache__', '.pytest_cache', 'coverage', '.nyc_output', 'logs', 'tmp', 'temp' ]; export interface ContextConfig { embedding?: Embedding; vectorDatabase?: VectorDatabase; codeSplitter?: Splitter; supportedExtensions?: string[]; ignorePatterns?: string[]; customExtensions?: string[]; // New: custom extensions from MCP customIgnorePatterns?: string[]; // New: custom ignore patterns from MCP } export class Context { private embedding: Embedding; private vectorDatabase: VectorDatabase; private codeSplitter: Splitter; private supportedExtensions: string[]; private ignorePatterns: string[]; private synchronizers = new Map<string, FileSynchronizer>(); constructor(config: ContextConfig = {}) { // Initialize services this.embedding = config.embedding || new OpenAIEmbedding({ apiKey: envManager.get('OPENAI_API_KEY') || 'your-openai-api-key', model: 'text-embedding-3-small', ...(envManager.get('OPENAI_BASE_URL') && { baseURL: envManager.get('OPENAI_BASE_URL') }) }); if (!config.vectorDatabase) { throw new Error('VectorDatabase is required. Please provide a vectorDatabase instance in the config.'); } this.vectorDatabase = config.vectorDatabase; this.codeSplitter = config.codeSplitter || new AstCodeSplitter(2500, 300); // Load custom extensions from environment variables const envCustomExtensions = this.getCustomExtensionsFromEnv(); // Combine default extensions with config extensions and env extensions const allSupportedExtensions = [ ...DEFAULT_SUPPORTED_EXTENSIONS, ...(config.supportedExtensions || []), ...(config.customExtensions || []), ...envCustomExtensions ]; // Remove duplicates this.supportedExtensions = [...new Set(allSupportedExtensions)]; // Load custom ignore patterns from environment variables const envCustomIgnorePatterns = this.getCustomIgnorePatternsFromEnv(); // Start with default ignore patterns const allIgnorePatterns = [ ...DEFAULT_IGNORE_PATTERNS, ...(config.ignorePatterns || []), ...(config.customIgnorePatterns || []), ...envCustomIgnorePatterns ]; // Remove duplicates this.ignorePatterns = [...new Set(allIgnorePatterns)]; console.log(`[Context] ๐Ÿ”ง Initialized with ${this.supportedExtensions.length} supported extensions and ${this.ignorePatterns.length} ignore patterns`); if (envCustomExtensions.length > 0) { console.log(`[Context] ๐Ÿ“Ž Loaded ${envCustomExtensions.length} custom extensions from environment: ${envCustomExtensions.join(', ')}`); } if (envCustomIgnorePatterns.length > 0) { console.log(`[Context] ๐Ÿšซ Loaded ${envCustomIgnorePatterns.length} custom ignore patterns from environment: ${envCustomIgnorePatterns.join(', ')}`); } } /** * Get embedding instance */ getEmbedding(): Embedding { return this.embedding; } /** * Get vector database instance */ getVectorDatabase(): VectorDatabase { return this.vectorDatabase; } /** * Get code splitter instance */ getCodeSplitter(): Splitter { return this.codeSplitter; } /** * Get supported extensions */ getSupportedExtensions(): string[] { return [...this.supportedExtensions]; } /** * Get ignore patterns */ getIgnorePatterns(): string[] { return [...this.ignorePatterns]; } /** * Get synchronizers map */ getSynchronizers(): Map<string, FileSynchronizer> { return new Map(this.synchronizers); } /** * Set synchronizer for a collection */ setSynchronizer(collectionName: string, synchronizer: FileSynchronizer): void { this.synchronizers.set(collectionName, synchronizer); } /** * Public wrapper for loadIgnorePatterns private method */ async getLoadedIgnorePatterns(codebasePath: string): Promise<void> { return this.loadIgnorePatterns(codebasePath); } /** * Public wrapper for prepareCollection private method */ async getPreparedCollection(codebasePath: string): Promise<void> { return this.prepareCollection(codebasePath); } /** * Get isHybrid setting from environment variable with default true */ private getIsHybrid(): boolean { const isHybridEnv = envManager.get('HYBRID_MODE'); if (isHybridEnv === undefined || isHybridEnv === null) { return true; // Default to true } return isHybridEnv.toLowerCase() === 'true'; } /** * Generate collection name based on codebase path and hybrid mode */ public getCollectionName(codebasePath: string): string { const isHybrid = this.getIsHybrid(); const normalizedPath = path.resolve(codebasePath); const hash = crypto.createHash('md5').update(normalizedPath).digest('hex'); const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks'; return `${prefix}_${hash.substring(0, 8)}`; } /** * Index a codebase for semantic search * @param codebasePath Codebase root path * @param progressCallback Optional progress callback function * @param forceReindex Whether to recreate the collection even if it exists * @returns Indexing statistics */ async indexCodebase( codebasePath: string, progressCallback?: (progress: { phase: string; current: number; total: number; percentage: number }) => void, forceReindex: boolean = false ): Promise<{ indexedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> { const isHybrid = this.getIsHybrid(); const searchType = isHybrid === true ? 'hybrid search' : 'semantic search'; console.log(`[Context] ๐Ÿš€ Starting to index codebase with ${searchType}: ${codebasePath}`); // 1. Load ignore patterns from various ignore files await this.loadIgnorePatterns(codebasePath); // 2. Check and prepare vector collection progressCallback?.({ phase: 'Preparing collection...', current: 0, total: 100, percentage: 0 }); console.log(`Debug2: Preparing vector collection for codebase${forceReindex ? ' (FORCE REINDEX)' : ''}`); await this.prepareCollection(codebasePath, forceReindex); // 3. Recursively traverse codebase to get all supported files progressCallback?.({ phase: 'Scanning files...', current: 5, total: 100, percentage: 5 }); const codeFiles = await this.getCodeFiles(codebasePath); console.log(`[Context] ๐Ÿ“ Found ${codeFiles.length} code files`); if (codeFiles.length === 0) { progressCallback?.({ phase: 'No files to index', current: 100, total: 100, percentage: 100 }); return { indexedFiles: 0, totalChunks: 0, status: 'completed' }; } // 3. Process each file with streaming chunk processing // Reserve 10% for preparation, 90% for actual indexing const indexingStartPercentage = 10; const indexingEndPercentage = 100; const indexingRange = indexingEndPercentage - indexingStartPercentage; const result = await this.processFileList( codeFiles, codebasePath, (filePath, fileIndex, totalFiles) => { // Calculate progress percentage const progressPercentage = indexingStartPercentage + (fileIndex / totalFiles) * indexingRange; console.log(`[Context] ๐Ÿ“Š Processed ${fileIndex}/${totalFiles} files`); progressCallback?.({ phase: `Processing files (${fileIndex}/${totalFiles})...`, current: fileIndex, total: totalFiles, percentage: Math.round(progressPercentage) }); } ); console.log(`[Context] โœ… Codebase indexing completed! Processed ${result.processedFiles} files in total, generated ${result.totalChunks} code chunks`); progressCallback?.({ phase: 'Indexing complete!', current: result.processedFiles, total: codeFiles.length, percentage: 100 }); return { indexedFiles: result.processedFiles, totalChunks: result.totalChunks, status: result.status }; } async reindexByChange( codebasePath: string, progressCallback?: (progress: { phase: string; current: number; total: number; percentage: number }) => void ): Promise<{ added: number, removed: number, modified: number }> { const collectionName = this.getCollectionName(codebasePath); const synchronizer = this.synchronizers.get(collectionName); if (!synchronizer) { // Load project-specific ignore patterns before creating FileSynchronizer await this.loadIgnorePatterns(codebasePath); // To be safe, let's initialize if it's not there. const newSynchronizer = new FileSynchronizer(codebasePath, this.ignorePatterns); await newSynchronizer.initialize(); this.synchronizers.set(collectionName, newSynchronizer); } const currentSynchronizer = this.synchronizers.get(collectionName)!; progressCallback?.({ phase: 'Checking for file changes...', current: 0, total: 100, percentage: 0 }); const { added, removed, modified } = await currentSynchronizer.checkForChanges(); const totalChanges = added.length + removed.length + modified.length; if (totalChanges === 0) { progressCallback?.({ phase: 'No changes detected', current: 100, total: 100, percentage: 100 }); console.log('[Context] โœ… No file changes detected.'); return { added: 0, removed: 0, modified: 0 }; } console.log(`[Context] ๐Ÿ”„ Found changes: ${added.length} added, ${removed.length} removed, ${modified.length} modified.`); let processedChanges = 0; const updateProgress = (phase: string) => { processedChanges++; const percentage = Math.round((processedChanges / (removed.length + modified.length + added.length)) * 100); progressCallback?.({ phase, current: processedChanges, total: totalChanges, percentage }); }; // Handle removed files for (const file of removed) { await this.deleteFileChunks(collectionName, file); updateProgress(`Removed ${file}`); } // Handle modified files for (const file of modified) { await this.deleteFileChunks(collectionName, file); updateProgress(`Deleted old chunks for ${file}`); } // Handle added and modified files const filesToIndex = [...added, ...modified].map(f => path.join(codebasePath, f)); if (filesToIndex.length > 0) { await this.processFileList( filesToIndex, codebasePath, (filePath, fileIndex, totalFiles) => { updateProgress(`Indexed ${filePath} (${fileIndex}/${totalFiles})`); } ); } console.log(`[Context] โœ… Re-indexing complete. Added: ${added.length}, Removed: ${removed.length}, Modified: ${modified.length}`); progressCallback?.({ phase: 'Re-indexing complete!', current: totalChanges, total: totalChanges, percentage: 100 }); return { added: added.length, removed: removed.length, modified: modified.length }; } private async deleteFileChunks(collectionName: string, relativePath: string): Promise<void> { // Escape backslashes for Milvus query expression (Windows path compatibility) const escapedPath = relativePath.replace(/\\/g, '\\\\'); const results = await this.vectorDatabase.query( collectionName, `relativePath == "${escapedPath}"`, ['id'] ); if (results.length > 0) { const ids = results.map(r => r.id as string).filter(id => id); if (ids.length > 0) { await this.vectorDatabase.delete(collectionName, ids); console.log(`[Context] Deleted ${ids.length} chunks for file ${relativePath}`); } } } /** * Semantic search with unified implementation * @param codebasePath Codebase path to search in * @param query Search query * @param topK Number of results to return * @param threshold Similarity threshold */ async semanticSearch(codebasePath: string, query: string, topK: number = 5, threshold: number = 0.5, filterExpr?: string): Promise<SemanticSearchResult[]> { const isHybrid = this.getIsHybrid(); const searchType = isHybrid === true ? 'hybrid search' : 'semantic search'; console.log(`[Context] ๐Ÿ” Executing ${searchType}: "${query}" in ${codebasePath}`); const collectionName = this.getCollectionName(codebasePath); console.log(`[Context] ๐Ÿ” Using collection: ${collectionName}`); // Check if collection exists and has data const hasCollection = await this.vectorDatabase.hasCollection(collectionName); if (!hasCollection) { console.log(`[Context] โš ๏ธ Collection '${collectionName}' does not exist. Please index the codebase first.`); return []; } if (isHybrid === true) { try { // Check collection stats to see if it has data const stats = await this.vectorDatabase.query(collectionName, '', ['id'], 1); console.log(`[Context] ๐Ÿ” Collection '${collectionName}' exists and appears to have data`); } catch (error) { console.log(`[Context] โš ๏ธ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error); } // 1. Generate query vector console.log(`[Context] ๐Ÿ” Generating embeddings for query: "${query}"`); const queryEmbedding: EmbeddingVector = await this.embedding.embed(query); console.log(`[Context] โœ… Generated embedding vector with dimension: ${queryEmbedding.vector.length}`); console.log(`[Context] ๐Ÿ” First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`); // 2. Prepare hybrid search requests const searchRequests: HybridSearchRequest[] = [ { data: queryEmbedding.vector, anns_field: "vector", param: { "nprobe": 10 }, limit: topK }, { data: query, anns_field: "sparse_vector", param: { "drop_ratio_search": 0.2 }, limit: topK } ]; console.log(`[Context] ๐Ÿ” Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`); console.log(`[Context] ๐Ÿ” Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`); // 3. Execute hybrid search console.log(`[Context] ๐Ÿ” Executing hybrid search with RRF reranking...`); const searchResults: HybridSearchResult[] = await this.vectorDatabase.hybridSearch( collectionName, searchRequests, { rerank: { strategy: 'rrf', params: { k: 100 } }, limit: topK, filterExpr } ); console.log(`[Context] ๐Ÿ” Raw search results count: ${searchResults.length}`); // 4. Convert to semantic search result format const results: SemanticSearchResult[] = searchResults.map(result => ({ content: result.document.content, relativePath: result.document.relativePath, startLine: result.document.startLine, endLine: result.document.endLine, language: result.document.metadata.language || 'unknown', score: result.score })); console.log(`[Context] โœ… Found ${results.length} relevant hybrid results`); if (results.length > 0) { console.log(`[Context] ๐Ÿ” Top result score: ${results[0].score}, path: ${results[0].relativePath}`); } return results; } else { // Regular semantic search // 1. Generate query vector const queryEmbedding: EmbeddingVector = await this.embedding.embed(query); // 2. Search in vector database const searchResults: VectorSearchResult[] = await this.vectorDatabase.search( collectionName, queryEmbedding.vector, { topK, threshold, filterExpr } ); // 3. Convert to semantic search result format const results: SemanticSearchResult[] = searchResults.map(result => ({ content: result.document.content, relativePath: result.document.relativePath, startLine: result.document.startLine, endLine: result.document.endLine, language: result.document.metadata.language || 'unknown', score: result.score })); console.log(`[Context] โœ… Found ${results.length} relevant results`); return results; } } /** * Check if index exists for codebase * @param codebasePath Codebase path to check * @returns Whether index exists */ async hasIndex(codebasePath: string): Promise<boolean> { const collectionName = this.getCollectionName(codebasePath); return await this.vectorDatabase.hasCollection(collectionName); } /** * Clear index * @param codebasePath Codebase path to clear index for * @param progressCallback Optional progress callback function */ async clearIndex( codebasePath: string, progressCallback?: (progress: { phase: string; current: number; total: number; percentage: number }) => void ): Promise<void> { console.log(`[Context] ๐Ÿงน Cleaning index data for ${codebasePath}...`); progressCallback?.({ phase: 'Checking existing index...', current: 0, total: 100, percentage: 0 }); const collectionName = this.getCollectionName(codebasePath); const collectionExists = await this.vectorDatabase.hasCollection(collectionName); progressCallback?.({ phase: 'Removing index data...', current: 50, total: 100, percentage: 50 }); if (collectionExists) { await this.vectorDatabase.dropCollection(collectionName); } // Delete snapshot file await FileSynchronizer.deleteSnapshot(codebasePath); progressCallback?.({ phase: 'Index cleared', current: 100, total: 100, percentage: 100 }); console.log('[Context] โœ… Index data cleaned'); } /** * Update ignore patterns (merges with default patterns and existing patterns) * @param ignorePatterns Array of ignore patterns to add to defaults */ updateIgnorePatterns(ignorePatterns: string[]): void { // Merge with default patterns and any existing custom patterns, avoiding duplicates const mergedPatterns = [...DEFAULT_IGNORE_PATTERNS, ...ignorePatterns]; const uniquePatterns: string[] = []; const patternSet = new Set(mergedPatterns); patternSet.forEach(pattern => uniquePatterns.push(pattern)); this.ignorePatterns = uniquePatterns; console.log(`[Context] ๐Ÿšซ Updated ignore patterns: ${ignorePatterns.length} new + ${DEFAULT_IGNORE_PATTERNS.length} default = ${this.ignorePatterns.length} total patterns`); } /** * Add custom ignore patterns (from MCP or other sources) without replacing existing ones * @param customPatterns Array of custom ignore patterns to add */ addCustomIgnorePatterns(customPatterns: string[]): void { if (customPatterns.length === 0) return; // Merge current patterns with new custom patterns, avoiding duplicates const mergedPatterns = [...this.ignorePatterns, ...customPatterns]; const uniquePatterns: string[] = []; const patternSet = new Set(mergedPatterns); patternSet.forEach(pattern => uniquePatterns.push(pattern)); this.ignorePatterns = uniquePatterns; console.log(`[Context] ๐Ÿšซ Added ${customPatterns.length} custom ignore patterns. Total: ${this.ignorePatterns.length} patterns`); } /** * Reset ignore patterns to defaults only */ resetIgnorePatternsToDefaults(): void { this.ignorePatterns = [...DEFAULT_IGNORE_PATTERNS]; console.log(`[Context] ๐Ÿ”„ Reset ignore patterns to defaults: ${this.ignorePatterns.length} patterns`); } /** * Update embedding instance * @param embedding New embedding instance */ updateEmbedding(embedding: Embedding): void { this.embedding = embedding; console.log(`[Context] ๐Ÿ”„ Updated embedding provider: ${embedding.getProvider()}`); } /** * Update vector database instance * @param vectorDatabase New vector database instance */ updateVectorDatabase(vectorDatabase: VectorDatabase): void { this.vectorDatabase = vectorDatabase; console.log(`[Context] ๐Ÿ”„ Updated vector database`); } /** * Update splitter instance * @param splitter New splitter instance */ updateSplitter(splitter: Splitter): void { this.codeSplitter = splitter; console.log(`[Context] ๐Ÿ”„ Updated splitter instance`); } /** * Prepare vector collection */ private async prepareCollection(codebasePath: string, forceReindex: boolean = false): Promise<void> { const isHybrid = this.getIsHybrid(); const collectionType = isHybrid === true ? 'hybrid vector' : 'vector'; console.log(`[Context] ๐Ÿ”ง Preparing ${collectionType} collection for codebase: ${codebasePath}${forceReindex ? ' (FORCE REINDEX)' : ''}`); const collectionName = this.getCollectionName(codebasePath); // Check if collection already exists const collectionExists = await this.vectorDatabase.hasCollection(collectionName); if (collectionExists && !forceReindex) { console.log(`๐Ÿ“‹ Collection ${collectionName} already exists, skipping creation`); return; } if (collectionExists && forceReindex) { console.log(`[Context] ๐Ÿ—‘๏ธ Dropping existing collection ${collectionName} for force reindex...`); await this.vectorDatabase.dropCollection(collectionName); console.log(`[Context] โœ… Collection ${collectionName} dropped successfully`); } console.log(`[Context] ๐Ÿ” Detecting embedding dimension for ${this.embedding.getProvider()} provider...`); const dimension = await this.embedding.detectDimension(); console.log(`[Context] ๐Ÿ“ Detected dimension: ${dimension} for ${this.embedding.getProvider()}`); const dirName = path.basename(codebasePath); if (isHybrid === true) { await this.vectorDatabase.createHybridCollection(collectionName, dimension, `Hybrid Index for ${dirName}`); } else { await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`); } console.log(`[Context] โœ… Collection ${collectionName} created successfully (dimension: ${dimension})`); } /** * Recursively get all code files in the codebase */ private async getCodeFiles(codebasePath: string): Promise<string[]> { const files: string[] = []; const traverseDirectory = async (currentPath: string) => { const entries = await fs.promises.readdir(currentPath, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(currentPath, entry.name); // Check if path matches ignore patterns if (this.matchesIgnorePattern(fullPath, codebasePath)) { continue; } if (entry.isDirectory()) { await traverseDirectory(fullPath); } else if (entry.isFile()) { const ext = path.extname(entry.name); if (this.supportedExtensions.includes(ext)) { files.push(fullPath); } } } }; await traverseDirectory(codebasePath); return files; } /** * Process a list of files with streaming chunk processing * @param filePaths Array of file paths to process * @param codebasePath Base path for the codebase * @param onFileProcessed Callback called when each file is processed * @returns Object with processed file count and total chunk count */ private async processFileList( filePaths: string[], codebasePath: string, onFileProcessed?: (filePath: string, fileIndex: number, totalFiles: number) => void ): Promise<{ processedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> { const isHybrid = this.getIsHybrid(); const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10)); const CHUNK_LIMIT = 450000; console.log(`[Context] ๐Ÿ”ง Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`); let chunkBuffer: Array<{ chunk: CodeChunk; codebasePath: string }> = []; let processedFiles = 0; let totalChunks = 0; let limitReached = false; for (let i = 0; i < filePaths.length; i++) { const filePath = filePaths[i]; try { const content = await fs.promises.readFile(filePath, 'utf-8'); const language = this.getLanguageFromExtension(path.extname(filePath)); const chunks = await this.codeSplitter.split(content, language, filePath); // Log files with many chunks or large content if (chunks.length > 50) { console.warn(`[Context] โš ๏ธ File ${filePath} generated ${chunks.length} chunks (${Math.round(content.length / 1024)}KB)`); } else if (content.length > 100000) { console.log(`๐Ÿ“„ Large file ${filePath}: ${Math.round(content.length / 1024)}KB -> ${chunks.length} chunks`); } // Add chunks to buffer for (const chunk of chunks) { chunkBuffer.push({ chunk, codebasePath }); totalChunks++; // Process batch when buffer reaches EMBEDDING_BATCH_SIZE if (chunkBuffer.length >= EMBEDDING_BATCH_SIZE) { try { await this.processChunkBuffer(chunkBuffer); } catch (error) { const searchType = isHybrid === true ? 'hybrid' : 'regular'; console.error(`[Context] โŒ Failed to process chunk batch for ${searchType}:`, error); if (error instanceof Error) { console.error('[Context] Stack trace:', error.stack); } } finally { chunkBuffer = []; // Always clear buffer, even on failure } } // Check if chunk limit is reached if (totalChunks >= CHUNK_LIMIT) { console.warn(`[Context] โš ๏ธ Chunk limit of ${CHUNK_LIMIT} reached. Stopping indexing.`); limitReached = true; break; // Exit the inner loop (over chunks) } } processedFiles++; onFileProcessed?.(filePath, i + 1, filePaths.length); if (limitReached) { break; // Exit the outer loop (over files) } } catch (error) { console.warn(`[Context] โš ๏ธ Skipping file ${filePath}: ${error}`); } } // Process any remaining chunks in the buffer if (chunkBuffer.length > 0) { const searchType = isHybrid === true ? 'hybrid' : 'regular'; console.log(`๐Ÿ“ Processing final batch of ${chunkBuffer.length} chunks for ${searchType}`); try { await this.processChunkBuffer(chunkBuffer); } catch (error) { console.error(`[Context] โŒ Failed to process final chunk batch for ${searchType}:`, error); if (error instanceof Error) { console.error('[Context] Stack trace:', error.stack); } } } return { processedFiles, totalChunks, status: limitReached ? 'limit_reached' : 'completed' }; } /** * Process accumulated chunk buffer */ private async processChunkBuffer(chunkBuffer: Array<{ chunk: CodeChunk; codebasePath: string }>): Promise<void> { if (chunkBuffer.length === 0) return; // Extract chunks and ensure they all have the same codebasePath const chunks = chunkBuffer.map(item => item.chunk); const codebasePath = chunkBuffer[0].codebasePath; // Estimate tokens (rough estimation: 1 token โ‰ˆ 4 characters) const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0); const isHybrid = this.getIsHybrid(); const searchType = isHybrid === true ? 'hybrid' : 'regular'; console.log(`[Context] ๐Ÿ”„ Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens) for ${searchType}`); await this.processChunkBatch(chunks, codebasePath); } /** * Process a batch of chunks */ private async processChunkBatch(chunks: CodeChunk[], codebasePath: string): Promise<void> { const isHybrid = this.getIsHybrid(); // Generate embedding vectors const chunkContents = chunks.map(chunk => chunk.content); const embeddings = await this.embedding.embedBatch(chunkContents); if (isHybrid === true) { // Create hybrid vector documents const documents: VectorDocument[] = chunks.map((chunk, index) => { if (!chunk.metadata.filePath) { throw new Error(`Missing filePath in chunk metadata at index ${index}`); } const relativePath = path.relative(codebasePath, chunk.metadata.filePath); const fileExtension = path.extname(chunk.metadata.filePath); const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata; return { id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content), content: chunk.content, // Full text content for BM25 and storage vector: embeddings[index].vector, // Dense vector relativePath, startLine: chunk.metadata.startLine || 0, endLine: chunk.metadata.endLine || 0, fileExtension, metadata: { ...restMetadata, codebasePath, language: chunk.metadata.language || 'unknown', chunkIndex: index } }; }); // Store to vector database await this.vectorDatabase.insertHybrid(this.getCollectionName(codebasePath), documents); } else { // Create regular vector documents const documents: VectorDocument[] = chunks.map((chunk, index) => { if (!chunk.metadata.filePath) { throw new Error(`Missing filePath in chunk metadata at index ${index}`); } const relativePath = path.relative(codebasePath, chunk.metadata.filePath); const fileExtension = path.extname(chunk.metadata.filePath); const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata; return { id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content), vector: embeddings[index].vector, content: chunk.content, relativePath, startLine: chunk.metadata.startLine || 0, endLine: chunk.metadata.endLine || 0, fileExtension, metadata: { ...restMetadata, codebasePath, language: chunk.metadata.language || 'unknown', chunkIndex: index } }; }); // Store to vector database await this.vectorDatabase.insert(this.getCollectionName(codebasePath), documents); } } /** * Get programming language based on file extension */ private getLanguageFromExtension(ext: string): string { const languageMap: Record<string, string> = { '.ts': 'typescript', '.tsx': 'typescript', '.js': 'javascript', '.jsx': 'javascript', '.py': 'python', '.java': 'java', '.cpp': 'cpp', '.c': 'c', '.h': 'c', '.hpp': 'cpp', '.cs': 'csharp', '.go': 'go', '.rs': 'rust', '.php': 'php', '.rb': 'ruby', '.swift': 'swift', '.kt': 'kotlin', '.scala': 'scala', '.m': 'objective-c', '.mm': 'objective-c', '.ipynb': 'jupyter' }; return languageMap[ext] || 'text'; } /** * Generate unique ID based on chunk content and location * @param relativePath Relative path to the file * @param startLine Start line number * @param endLine End line number * @param content Chunk content * @returns Hash-based unique ID */ private generateId(relativePath: string, startLine: number, endLine: number, content: string): string { const combinedString = `${relativePath}:${startLine}:${endLine}:${content}`; const hash = crypto.createHash('sha256').update(combinedString, 'utf-8').digest('hex'); return `chunk_${hash.substring(0, 16)}`; } /** * Read ignore patterns from file (e.g., .gitignore) * @param filePath Path to the ignore file * @returns Array of ignore patterns */ static async getIgnorePatternsFromFile(filePath: string): Promise<string[]> { try { const content = await fs.promises.readFile(filePath, 'utf-8'); return content .split('\n') .map(line => line.trim()) .filter(line => line && !line.startsWith('#')); // Filter out empty lines and comments } catch (error) { console.warn(`[Context] โš ๏ธ Could not read ignore file ${filePath}: ${error}`); return []; } } /** * Load ignore patterns from various ignore files in the codebase * This method preserves any existing custom patterns that were added before * @param codebasePath Path to the codebase */ private async loadIgnorePatterns(codebasePath: string): Promise<void> { try { let fileBasedPatterns: string[] = []; // Load all .xxxignore files in codebase directory const ignoreFiles = await this.findIgnoreFiles(codebasePath); for (const ignoreFile of ignoreFiles) { const patterns = await this.loadIgnoreFile(ignoreFile, path.basename(ignoreFile)); fileBasedPatterns.push(...patterns); } // Load global ~/.context/.contextignore const globalIgnorePatterns = await this.loadGlobalIgnoreFile(); fileBasedPatterns.push(...globalIgnorePatterns); // Merge file-based patterns with existing patterns (which may include custom MCP patterns) if (fileBasedPatterns.length > 0) { this.addCustomIgnorePatterns(fileBasedPatterns); console.log(`[Context] ๐Ÿšซ Loaded total ${fileBasedPatterns.length} ignore patterns from all ignore files`); } else { console.log('๐Ÿ“„ No ignore files found, keeping existing patterns'); } } catch (error) { console.warn(`[Context] โš ๏ธ Failed to load ignore patterns: ${error}`); // Continue with existing patterns on error - don't reset them } } /** * Find all .xxxignore files in the codebase directory * @param codebasePath Path to the codebase * @returns Array of ignore file paths */ private async findIgnoreFiles(codebasePath: string): Promise<string[]> { try { const entries = await fs.promises.readdir(codebasePath, { withFileTypes: true }); const ignoreFiles: string[] = []; for (const entry of entries) { if (entry.isFile() && entry.name.startsWith('.') && entry.name.endsWith('ignore')) { ignoreFiles.push(path.join(codebasePath, entry.name)); } } if (ignoreFiles.length > 0) { console.log(`๐Ÿ“„ Found ignore files: ${ignoreFiles.map(f => path.basename(f)).join(', ')}`); } return ignoreFiles; } catch (error) { console.warn(`[Context] โš ๏ธ Failed to scan for ignore files: ${error}`); return []; } } /** * Load global ignore file from ~/.context/.contextignore * @returns Array of ignore patterns */ private async loadGlobalIgnoreFile(): Promise<string[]> { try { const homeDir = require('os').homedir(); const globalIgnorePath = path.join(homeDir, '.context', '.contextignore'); return await this.loadIgnoreFile(globalIgnorePath, 'global .contextignore'); } catch (error) { // Global ignore file is optional, don't log warnings return []; } } /** * Load ignore patterns from a specific ignore file * @param filePath Path to the ignore file * @param fileName Display name for logging * @returns Array of ignore patterns */ private async loadIgnoreFile(filePath: string, fileName: string): Promise<string[]> { try { await fs.promises.access(filePath); console.log(`๐Ÿ“„ Found ${fileName} file at: ${filePath}`); const ignorePatterns = await Context.getIgnorePatternsFromFile(filePath); if (ignorePatterns.length > 0) { console.log(`[Context] ๐Ÿšซ Loaded ${ignorePatterns.length} ignore patterns from ${fileName}`); return ignorePatterns; } else { console.log(`๐Ÿ“„ ${fileName} file found but no valid patterns detected`); return []; } } catch (error) { if (fileName.includes('global')) { console.log(`๐Ÿ“„ No ${fileName} file found`); } return []; } } /** * Check if a path matches any ignore pattern * @param filePath Path to check * @param basePath Base path for relative pattern matching * @returns True if path should be ignored */ private matchesIgnorePattern(filePath: string, basePath: string): boolean { if (this.ignorePatterns.length === 0) { return false; } const relativePath = path.relative(basePath, filePath); const normalizedPath = relativePath.replace(/\\/g, '/'); // Normalize path separators for (const pattern of this.ignorePatterns) { if (this.isPatternMatch(normalizedPath, pattern)) { return true; } } return false; } /** * Simple glob pattern matching * @param filePath File path to test * @param pattern Glob pattern * @returns True if pattern matches */ private isPatternMatch(filePath: string, pattern: string): boolean { // Handle directory patterns (ending with /) if (pattern.endsWith('/')) { const dirPattern = pattern.slice(0, -1); const pathParts = filePath.split('/'); return pathParts.some(part => this.simpleGlobMatch(part, dirPattern)); } // Handle file patterns if (pattern.includes('/')) { // Pattern with path separator - match exact path return this.simpleGlobMatch(filePath, pattern); } else { // Pattern without path separator - match filename in any directory const fileName = path.basename(filePath); return this.simpleGlobMatch(fileName, pattern); } } /** * Simple glob matching supporting * wildcard * @param text Text to test * @param pattern Pattern with * wildcards * @returns True if pattern matches */ private simpleGlobMatch(text: string, pattern: string): boolean { // Convert glob pattern to regex const regexPattern = pattern .replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape regex special chars except * .replace(/\*/g, '.*'); // Convert * to .* const regex = new RegExp(`^${regexPattern}$`); return regex.test(text); } /** * Get custom extensions from environment variables * Supports CUSTOM_EXTENSIONS as comma-separated list * @returns Array of custom extensions */ private getCustomExtensionsFromEnv(): string[] { const envExtensions = envManager.get('CUSTOM_EXTENSIONS'); if (!envExtensions) { return []; } try { const extensions = envExtensions .split(',') .map(ext => ext.trim()) .filter(ext => ext.length > 0) .map(ext => ext.startsWith('.') ? ext : `.${ext}`); // Ensure extensions start with dot return extensions; } catch (error) { console.warn(`[Context] โš ๏ธ Failed to parse CUSTOM_EXTENSIONS: ${error}`); return []; } } /** * Get custom ignore patterns from environment variables * Supports CUSTOM_IGNORE_PATTERNS as comma-separated list * @returns Array of custom ignore patterns */ private getCustomIgnorePatternsFromEnv(): string[] { const envIgnorePatterns = envManager.get('CUSTOM_IGNORE_PATTERNS'); if (!envIgnorePatterns) { return []; } try { const patterns = envIgnorePatterns .split(',') .map(pattern => pattern.trim()) .filter(pattern => pattern.length > 0); return patterns; } catch (error) { console.warn(`[Context] โš ๏ธ Failed to parse CUSTOM_IGNORE_PATTERNS: ${error}`); return []; } } /** * Add custom extensions (from MCP or other sources) without replacing existing ones * @param customExtensions Array of custom extensions to add */ addCustomExtensions(customExtensions: string[]): void { if (customExtensions.length === 0) return; // Ensure extensions start with dot const normalizedExtensions = customExtensions.map(ext => ext.startsWith('.') ? ext : `.${ext}` ); // Merge current extensions with new custom extensions, avoiding duplicates const mergedExtensions = [...this.supportedExtensions, ...normalizedExtensions]; const uniqueExtensions: string[] = [...new Set(mergedExtensions)]; this.supportedExtensions = uniqueExtensions; console.log(`[Context] ๐Ÿ“Ž Added ${customExtensions.length} custom extensions. Total: ${this.supportedExtensions.length} extensions`); } /** * Get current splitter information */ getSplitterInfo(): { type: string; hasBuiltinFallback: boolean; supportedLanguages?: string[] } { const splitterName = this.codeSplitter.constructor.name; if (splitterName === 'AstCodeSplitter') { const { AstCodeSplitter } = require('./splitter/ast-splitter'); return { type: 'ast', hasBuiltinFallback: true, supportedLanguages: AstCodeSplitter.getSupportedLanguages() }; } else { return { type: 'langchain', hasBuiltinFallback: false }; } } /** * Check if current splitter supports a specific language * @param language Programming language */ isLanguageSupported(language: string): boolean { const splitterName = this.codeSplitter.constructor.name; if (splitterName === 'AstCodeSplitter') { const { AstCodeSplitter } = require('./splitter/ast-splitter'); return AstCodeSplitter.isLanguageSupported(language); } // LangChain splitter supports most languages return true; } /** * Get which strategy would be used for a specific language * @param language Programming language */ getSplitterStrategyForLanguage(language: string): { strategy: 'ast' | 'langchain'; reason: string } { const splitterName = this.codeSplitter.constructor.name; if (splitterName === 'AstCodeSplitter') { const { AstCodeSplitter } = require('./splitter/ast-splitter'); const isSupported = AstCodeSplitter.isLanguageSupported(language); return { strategy: isSupported ? 'ast' : 'langchain', reason: isSupported ? 'Language supported by AST parser' : 'Language not supported by AST, will fallback to LangChain' }; } else { return { strategy: 'langchain', reason: 'Using LangChain splitter directly' }; } } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/zilliztech/claude-context'

If you have feedback or need assistance with the MCP directory API, please join our Discord server