Skip to main content
Glama
syncCode.ts19.1 kB
import { join, basename } from 'path'; import { readFileSync, existsSync, statSync, readdirSync } from 'fs'; import { glob } from 'glob'; import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; import { SyncCodeSchema, type CodeChunk, type MemoryDocument } from '../types/memory-v5.js'; import { getMemoryCollection, getDb } from '../db/connection.js'; import { chunkFile } from '../utils/codeChunking.js'; import { generateCodeEmbeddings } from '../embeddings/codeEmbeddings.js'; import { generateEmbedding } from '../embeddings/voyage-v5.js'; import { logger } from '../utils/logger.js'; import { resolveProjectPath } from '../utils/sessionContext.js'; import { detectFramework } from '../utils/frameworkDetection.js'; import type { Collection } from 'mongodb'; // Auto-generate directory structure for codebaseMap function generateDirectoryStructure(projectPath: string, indent = '', maxDepth = 3, currentDepth = 0): string { if (currentDepth >= maxDepth) return ''; let structure = ''; try { const entries = readdirSync(projectPath, { withFileTypes: true }); const filtered = entries.filter(e => !e.name.startsWith('.') && !['node_modules', 'dist', 'build', '__pycache__', 'vendor'].includes(e.name) ); filtered.forEach((entry, index) => { const isLast = index === filtered.length - 1; const prefix = isLast ? '└── ' : '├── '; const childIndent = indent + (isLast ? ' ' : '│ '); if (entry.isDirectory()) { structure += `${indent}${prefix}${entry.name}/\n`; if (currentDepth < maxDepth - 1) { structure += generateDirectoryStructure( join(projectPath, entry.name), childIndent, maxDepth, currentDepth + 1 ); } } else { structure += `${indent}${prefix}${entry.name}\n`; } }); } catch (e) { // Ignore permission errors } return structure; } export async function syncCodeTool(args: unknown): Promise<CallToolResult> { try { const params = SyncCodeSchema.parse(args); const projectPath = resolveProjectPath(params.projectPath); // Detect framework and use smart defaults const framework = detectFramework(projectPath); // Use provided patterns or framework-specific patterns const patterns = params.patterns.length > 0 ? params.patterns : framework.patterns; // Use provided minChunkSize or framework-specific default const minChunkSize = params.minChunkSize || framework.minChunkSize; logger.info(`🏗️ FRAMEWORK: ${framework.displayName} detected - Using optimized settings`, { patterns, minChunkSize, excludes: framework.excludes }); // Read project config const configPath = join(projectPath, '.memory-engineering', 'config.json'); if (!existsSync(configPath)) { return { content: [ { type: 'text', text: '🔴 FATAL: No brain to store embeddings! INIT REQUIRED!\n\n⚡ RUN NOW: memory_engineering_init\n\nCode sync impossible without memory system!', }, ], }; } const config = JSON.parse(readFileSync(configPath, 'utf-8')); const memoryCollection = getMemoryCollection(); const codeCollection = getDb().collection<CodeChunk>('memory_engineering_code'); // Get or create codebaseMap memory let codebaseMap = await memoryCollection.findOne({ projectId: config.projectId, memoryName: 'codebaseMap' }); if (!codebaseMap) { // Auto-create codebaseMap with smart content logger.info('🗺️ AUTO-CREATING CODEBASE MAP - Building your GPS system...'); const dirStructure = generateDirectoryStructure(projectPath); const projectName = basename(projectPath); const codebaseMapContent = `# Codebase Map - ${projectName} ## Directory Structure \`\`\` ${dirStructure || 'No visible directories found'} \`\`\` ## Key Files (Will be auto-detected during sync) ## Module Organization (Will be analyzed during sync) ## Code Embedding Statistics (Will be auto-updated by sync_code)`; // Generate embedding for the codebaseMap const embedding = await generateEmbedding(codebaseMapContent); // Create the memory document const newCodebaseMap: MemoryDocument = { projectId: config.projectId, memoryName: 'codebaseMap', content: codebaseMapContent, contentVector: embedding, metadata: { version: 1, lastModified: new Date(), accessCount: 0 }, createdAt: new Date(), updatedAt: new Date() }; // Insert into database const result = await memoryCollection.insertOne(newCodebaseMap); logger.info('✅ CODEBASE MAP CREATED - Your navigation system is ready!'); // Fetch the inserted document with _id codebaseMap = await memoryCollection.findOne({ _id: result.insertedId }); } // Create indexes for code collection if needed await ensureCodeIndexes(codeCollection); // Find all code files matching patterns logger.info(`🎯 SCAN PATTERNS LOCKED: ${patterns.join(', ')}`); const files = await glob(patterns, { cwd: projectPath, absolute: true, ignore: [ '**/node_modules/**', '**/dist/**', '**/build/**', '**/.git/**', '**/coverage/**', ...(params.includeTests ? [] : ['**/*.test.*', '**/*.spec.*']) ] }); logger.info(`🔥 FOUND ${files.length} CODE FILES - Preparing to absorb knowledge!`); // Process files with progress tracking let totalChunks = 0; let processedFiles = 0; let skippedFiles = 0; // Language tracking removed to fix Voyage AI issues const patternStats = new Map<string, number>(); const errors: string[] = []; // Progress tracking const startTime = Date.now(); let lastProgressReport = 0; // Process in batches to avoid memory issues const BATCH_SIZE = 10; for (let i = 0; i < files.length; i += BATCH_SIZE) { const batch = files.slice(i, i + BATCH_SIZE); const batchChunks: CodeChunk[] = []; // Progress reporting every 10 batches or 30 seconds const currentTime = Date.now(); const batchNumber = Math.floor(i / BATCH_SIZE) + 1; const totalBatches = Math.ceil(files.length / BATCH_SIZE); if (batchNumber % 10 === 0 || currentTime - lastProgressReport > 30000) { const elapsed = Math.round((currentTime - startTime) / 1000); const progress = Math.round((i / files.length) * 100); logger.info(`⚡ EMBEDDING PROGRESS: ${progress}% | Batch ${batchNumber}/${totalBatches} | Files: ${processedFiles + skippedFiles}/${files.length} | Chunks: ${totalChunks} | Time: ${elapsed}s`); lastProgressReport = currentTime; } for (const file of batch) { try { const stats = statSync(file); const lastModified = stats.mtime; // Check if file needs updating if (!params.forceRegenerate) { const existingChunks = await codeCollection.findOne({ filePath: file, lastModified: { $gte: lastModified } }); if (existingChunks) { skippedFiles++; continue; } } // Remove old chunks for this file await codeCollection.deleteMany({ filePath: file }); // Chunk the file const chunks = await chunkFile(file, config.projectId, codebaseMap._id.toString()); // Filter by minimum size const validChunks = chunks.filter(c => c.metadata.size >= params.minChunkSize); if (validChunks.length > 0) { // Language tracking removed to fix Voyage AI issues // Track pattern stats for (const chunk of validChunks) { for (const pattern of chunk.metadata.patterns) { patternStats.set(pattern, (patternStats.get(pattern) || 0) + 1); } } batchChunks.push(...validChunks as any); processedFiles++; } } catch (error) { logger.error(`❌ CHUNK EXTRACTION FAILED: ${file}`, error); errors.push(`${file}: ${error instanceof Error ? error.message : 'Unknown error'}`); } } // Generate embeddings for this batch if (batchChunks.length > 0) { try { logger.info(`🧠 GENERATING EMBEDDINGS: ${batchChunks.length} chunks from ${batch.length} files`); // Generate contextualized embeddings const embeddings = await generateCodeEmbeddings(batchChunks); logger.info(`🎆 EMBEDDINGS CREATED: ${embeddings.length} vectors ready!`); // Debug: Check if embeddings are actually valid const validEmbeddingCount = embeddings.filter(e => e && e.length > 0).length; const emptyEmbeddingCount = embeddings.filter(e => !e || e.length === 0).length; logger.info(`🎯 EMBEDDING VALIDATION: ${validEmbeddingCount} valid | ${emptyEmbeddingCount} empty`); // Add embeddings and timestamps to chunks logger.info(`💾 STORING IN MONGODB: ${batchChunks.length} chunks with ${embeddings.length} embeddings`); const chunksWithEmbeddings = batchChunks .map((chunk, idx) => { const embedding = embeddings[idx]; if (!embedding || embedding.length === 0) { logger.error(`💀 EMBEDDING VOID: Chunk ${idx} [${chunk.chunk.name || 'unnamed'}] has ${embedding ? 'empty array' : 'NO'} embedding!`); return null; } logger.debug(`🌟 PERFECT EMBEDDING: Chunk ${idx} [${chunk.chunk.name || 'unnamed'}] - ${embedding.length} dimensions`); return { ...chunk, contentVector: embedding, createdAt: new Date(), updatedAt: new Date() }; }) .filter((chunk): chunk is CodeChunk & { contentVector: number[] } => chunk !== null && chunk.contentVector !== undefined && chunk.contentVector.length > 0 ); logger.info(`📊 Result: ${chunksWithEmbeddings.length} chunks have valid embeddings out of ${batchChunks.length} processed`); // Insert into database if (chunksWithEmbeddings.length > 0) { logger.info(`🚀 LAUNCHING TO MONGODB: ${chunksWithEmbeddings.length} chunks...`); try { const result = await codeCollection.insertMany(chunksWithEmbeddings); logger.info(`🎉 SUCCESS! ${result.insertedCount} chunks now searchable in MongoDB!`); } catch (dbError) { logger.error(`💥 MONGODB EXPLOSION! Insertion failed:`, dbError); throw dbError; } } else { logger.error(`🔴 CRITICAL: Zero valid embeddings! Nothing to store!`); } totalChunks += chunksWithEmbeddings.length; } catch (error) { logger.error('💣 EMBEDDING GENERATION EXPLODED:', error); errors.push(`Embedding generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`); } } } // Update codebaseMap with statistics const updatedContent = await updateCodebaseMapWithStats( codebaseMap.content, { totalFiles: processedFiles + skippedFiles, processedFiles, skippedFiles, totalChunks, // languages removed to fix Voyage AI issues patterns: Object.fromEntries(patternStats), lastSync: new Date().toISOString() } ); await memoryCollection.updateOne( { _id: codebaseMap._id }, { $set: { content: updatedContent, updatedAt: new Date(), 'metadata.lastModified': new Date() } } ); // Build response with timing information const totalTime = Math.round((Date.now() - startTime) / 1000); const avgSpeed = processedFiles > 0 ? Math.round(processedFiles / totalTime * 10) / 10 : 0; let response = `⚡ CODE INTELLIGENCE ACTIVATED! Your code is now SEARCHABLE! 🧠\n\n`; response += `📊 SYNC PERFORMANCE METRICS:\n`; response += `• ⏱️ Total Time: ${totalTime} seconds${totalTime < 5 ? ' (LIGHTNING FAST! ⚡)' : totalTime < 30 ? ' (Quick sync ✨)' : ' (Large codebase processed)'}\n`; response += `• 📁 Files Processed: ${processedFiles} files${processedFiles > 100 ? ' (MASSIVE codebase!)' : processedFiles > 50 ? ' (Substantial project)' : ''}\n`; response += `• ⏭️ Files Skipped: ${skippedFiles} (already up-to-date)\n`; response += `• 🧩 Chunks Created: ${totalChunks} searchable units\n`; response += `• 🚀 Processing Speed: ${avgSpeed} files/second\n`; response += `• 💾 Embeddings: ${totalChunks} vectors stored in MongoDB\n\n`; if (patternStats.size > 0) { response += `🔍 DISCOVERED PATTERNS (use these in searches!):\n`; const sortedPatterns = Array.from(patternStats.entries()).sort((a, b) => b[1] - a[1]); for (const [pattern, count] of sortedPatterns.slice(0, 10)) { const emoji = count > 50 ? '🔥' : count > 20 ? '⭐' : '•'; response += `${emoji} ${pattern}: ${count} occurrences\n`; } response += '\n'; } if (errors.length > 0) { response += `⚠️ SYNC WARNINGS (${errors.length} files had issues):\n`; response += errors.slice(0, 3).map(e => `• ${e}`).join('\n'); if (errors.length > 3) { response += `\n• ... and ${errors.length - 3} more (non-critical)\n`; } response += '\n'; } response += `🎯 YOUR CODE IS NOW SEARCHABLE! Try these POWER SEARCHES:\n\n`; response += `1️⃣ **Find Similar Code** (semantic search):\n`; response += ` memory_engineering_search --query "authentication" --codeSearch "similar"\n`; response += ` → Finds ALL auth-related code, even without exact matches!\n\n`; response += `2️⃣ **Find Implementations** (where things are built):\n`; response += ` memory_engineering_search --query "UserService" --codeSearch "implements"\n`; response += ` → Locates where classes/functions are defined\n\n`; response += `3️⃣ **Find Usage** (where things are used):\n`; response += ` memory_engineering_search --query "generateToken" --codeSearch "uses"\n`; response += ` → Discovers all places using this function\n\n`; response += `4️⃣ **Find Patterns** (architectural search):\n`; response += ` memory_engineering_search --query "Repository" --codeSearch "pattern"\n`; response += ` → Identifies design pattern implementations\n\n`; response += `⚡ SYNC INTELLIGENCE:\n`; response += `• Next sync needed: ${skippedFiles > processedFiles ? 'Not soon (most files unchanged)' : 'After 10-15 file edits'}\n`; response += `• Auto-sync triggers: File changes, >24h gap, before searches\n`; response += `• Optimization: ${processedFiles === 0 ? '✅ Everything was already synced!' : totalChunks > 500 ? '💡 Large codebase - consider targeted syncs with patterns' : '✅ Optimal chunk size'}\n\n`; response += `🔥 REMEMBER: Fresh embeddings = Perfect search. Stale embeddings = Blind search!\n\n`; response += `💡 PRO TIP: ${totalChunks > 1000 ? 'With ' + totalChunks + ' chunks, use specific searches for speed!' : totalChunks < 50 ? 'Small codebase synced - search will be instant!' : 'Perfect size for fast, accurate searches!'}`; return { content: [ { type: 'text', text: response, }, ], }; } catch (error) { logger.error('💀 SYNC CATASTROPHE - Code intelligence failed!', error); return { isError: true, content: [ { type: 'text', text: `Code sync failed: ${error instanceof Error ? error.message : 'Unknown error'}`, }, ], }; } } async function ensureCodeIndexes(collection: Collection<CodeChunk>): Promise<void> { // Get existing indexes to avoid conflicts const existingIndexes = await collection.listIndexes().toArray(); const indexesToCreate = [ { keys: { projectId: 1, filePath: 1 }, name: 'project_file' }, { keys: { projectId: 1, 'chunk.type': 1 }, name: 'project_chunk_type' }, { keys: { projectId: 1, 'metadata.patterns': 1 }, name: 'project_patterns' }, { keys: { projectId: 1, language: 1 }, name: 'project_language' }, { keys: { projectId: 1, lastModified: -1 }, name: 'project_lastmodified' }, { keys: { 'chunk.name': 'text', 'chunk.signature': 'text', searchableText: 'text' }, name: 'code_text' } ]; for (const indexDef of indexesToCreate) { try { // Check if index with same keys already exists const keyString = JSON.stringify(indexDef.keys); const exists = existingIndexes.some(idx => JSON.stringify(idx.key) === keyString); if (!exists) { await collection.createIndex(indexDef.keys as any, { name: indexDef.name }); logger.info(`🎯 INDEX CREATED: ${indexDef.name} - Search speed boosted!`); } else { logger.info(`♾️ INDEX EXISTS: ${keyString} - Already optimized!`); } } catch (error: any) { if (error.code === 85) { logger.info(`⏭️ DUPLICATE INDEX: ${indexDef.name} - Skipping creation`); } else { logger.error(`🔴 INDEX CREATION FAILED: ${indexDef.name}`, error); throw error; } } } } async function updateCodebaseMapWithStats( content: string, stats: { totalFiles: number; processedFiles: number; skippedFiles: number; totalChunks: number; // languages removed to fix Voyage AI issues patterns: Record<string, number>; lastSync: string; } ): Promise<string> { // Remove old stats section if exists const statsRegex = /## Code Embedding Statistics[\s\S]*?(?=##|$)/; let updatedContent = content.replace(statsRegex, ''); // Add new stats section const statsSection = `## Code Embedding Statistics Last Sync: ${stats.lastSync} Total Files: ${stats.totalFiles} Processed: ${stats.processedFiles} Skipped: ${stats.skippedFiles} Total Chunks: ${stats.totalChunks} ### File Types (Language tracking temporarily disabled) ### Common Patterns ${Object.entries(stats.patterns) .sort(([,a], [,b]) => b - a) .slice(0, 10) .map(([pattern, count]) => `- ${pattern}: ${count} occurrences`) .join('\n')} `; return updatedContent.trim() + '\n\n' + statsSection; } // Export syncCode function for direct use (not through MCP) export const syncCode = syncCodeTool;

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/romiluz13/memory-engineering-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server