Skip to main content
Glama

Ultra MCP

indexer.ts8.43 kB
import { readFile, stat, appendFile } from 'fs/promises'; import { existsSync } from 'fs'; import { join, relative } from 'path'; import { createHash } from 'crypto'; import fg from 'fast-glob'; import ignore from 'ignore'; import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; import { getVectorDB, float32ArrayToBuffer } from './db'; import { EmbeddingProvider } from '../providers/embeddings'; import { logger } from '../utils/logger'; import { VectorConfig } from '../config/schema'; export interface IndexingOptions { projectPath: string; provider: EmbeddingProvider; config: VectorConfig; force?: boolean; onProgress?: (message: string) => void; } export interface IndexingResult { filesIndexed: number; chunksCreated: number; timeMs: number; } export async function indexProject(options: IndexingOptions): Promise<IndexingResult> { const startTime = Date.now(); const { projectPath, provider, config, force = false, onProgress } = options; onProgress?.('Initializing vector database...'); const { db, client } = await getVectorDB(projectPath); // Update .gitignore if needed await updateGitignore(projectPath); // Get files to index onProgress?.('Scanning project files...'); const files = await getFilesToIndex(projectPath, config.filePatterns); if (files.length === 0) { logger.warn('No files found to index'); return { filesIndexed: 0, chunksCreated: 0, timeMs: Date.now() - startTime }; } onProgress?.(`Found ${files.length} files to process`); // Create text splitter const splitter = new RecursiveCharacterTextSplitter({ chunkSize: config.chunkSize, chunkOverlap: config.chunkOverlap, }); let filesIndexed = 0; let chunksCreated = 0; // Process files in batches for (let i = 0; i < files.length; i += config.batchSize) { const batch = files.slice(i, i + config.batchSize); const batchChunks: Array<{ id: string; relpath: string; chunk: string; hash: string; mtimeMs: number; embedding?: number[]; }> = []; // Process batch for (const filePath of batch) { try { const relPath = relative(projectPath, filePath); const stats = await stat(filePath); const content = await readFile(filePath, 'utf-8'); // Skip empty files if (!content.trim()) continue; // Split into chunks const chunks = await splitter.splitText(content); for (let idx = 0; idx < chunks.length; idx++) { const chunk = chunks[idx]; const id = `${relPath}#${idx}`; const hash = createHash('sha256').update(chunk).digest('hex'); // Check if chunk already exists with same hash if (!force) { const result = await client.execute({ sql: `SELECT hash, mtime_ms FROM vector_chunks WHERE id = ?`, args: [id] }); if (result.rows.length > 0) { const row = result.rows[0]; const existing = { hash: row[0] as string, mtime_ms: row[1] as number }; if (existing.hash === hash && existing.mtime_ms === stats.mtimeMs) { continue; // Skip unchanged chunk } } } batchChunks.push({ id, relpath: relPath, chunk, hash, mtimeMs: stats.mtimeMs, }); } filesIndexed++; } catch (error) { logger.error(`Error processing file ${filePath}:`, error); } } // Generate embeddings for batch if (batchChunks.length > 0) { onProgress?.(`Generating embeddings for batch ${Math.floor(i / config.batchSize) + 1}...`); try { const texts = batchChunks.map(c => c.chunk); const embeddings = await provider.getEmbeddings(texts); // Store chunks with embeddings (dual-table approach) for (let j = 0; j < batchChunks.length; j++) { const chunk = batchChunks[j]; const embedding = embeddings[j]; // Validate embedding dimensions if (embedding.length !== 1536) { logger.warn(`Embedding dimension mismatch for ${chunk.id}: expected 1536, got ${embedding.length}`); continue; } try { // 1. Insert/update metadata in main table const result = await client.execute({ sql: `INSERT OR REPLACE INTO vector_chunks (id, relpath, chunk, hash, mtime_ms) VALUES (?, ?, ?, ?, ?)`, args: [ chunk.id, chunk.relpath, chunk.chunk, chunk.hash, chunk.mtimeMs, ] }); // 2. Get the rowid for linking const rowidResult = await client.execute({ sql: `SELECT rowid FROM vector_chunks WHERE id = ?`, args: [chunk.id] }); if (rowidResult.rows.length > 0) { const rowid = rowidResult.rows[0][0] as number; // 3. Try to insert into VSS virtual table try { // VSS requires DELETE before INSERT for updates await client.execute({ sql: `DELETE FROM vss_vectors WHERE rowid = ?`, args: [rowid] }); await client.execute({ sql: `INSERT INTO vss_vectors (rowid, embedding) VALUES (?, ?)`, args: [rowid, new Float32Array(embedding).buffer] }); } catch (vssError) { // VSS not available, fallback to adding embedding to main table await client.execute({ sql: `UPDATE vector_chunks SET embedding = ? WHERE id = ?`, args: [float32ArrayToBuffer(embedding), chunk.id] }); } chunksCreated++; } } catch (error) { logger.error(`Error storing chunk ${chunk.id}:`, error); } } } catch (error) { logger.error('Error generating embeddings:', error); throw error; } } onProgress?.(`Processed ${Math.min(i + config.batchSize, files.length)} / ${files.length} files`); } const timeMs = Date.now() - startTime; onProgress?.(`Indexing complete: ${filesIndexed} files, ${chunksCreated} chunks in ${(timeMs / 1000).toFixed(1)}s`); return { filesIndexed, chunksCreated, timeMs }; } async function getFilesToIndex(projectPath: string, patterns: string[]): Promise<string[]> { // Read .gitignore let ig = ignore(); const gitignorePath = join(projectPath, '.gitignore'); if (existsSync(gitignorePath)) { try { const gitignoreContent = await readFile(gitignorePath, 'utf-8'); ig = ignore().add(gitignoreContent); } catch (error) { logger.warn('Could not read .gitignore:', error); } } // Always ignore .ultra-mcp directory ig.add('.ultra-mcp'); // Find files matching patterns const files = await fg(patterns, { cwd: projectPath, absolute: true, dot: false, followSymbolicLinks: false, ignore: [ '**/node_modules/**', '**/.git/**', '**/.ultra-mcp/**', '**/dist/**', '**/build/**', '**/.next/**', '**/.nuxt/**', '**/coverage/**', ], }); // Filter by gitignore return files.filter(file => { const relPath = relative(projectPath, file); return !ig.ignores(relPath); }); } async function updateGitignore(projectPath: string): Promise<void> { const gitignorePath = join(projectPath, '.gitignore'); try { let content = ''; if (existsSync(gitignorePath)) { content = await readFile(gitignorePath, 'utf-8'); } if (!content.includes('.ultra-mcp')) { const addition = content.endsWith('\n') ? '' : '\n'; await appendFile(gitignorePath, `${addition}\n# Ultra MCP vector index\n.ultra-mcp/\n`); logger.log('Added .ultra-mcp to .gitignore'); } } catch (error) { logger.warn('Could not update .gitignore:', error); } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/RealMikeChong/ultra-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server