Skip to main content
Glama

RAG Documentation MCP Server

import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; import { BaseHandler } from './base-handler.js'; import { DocumentChunk, McpToolResponse, RepositoryConfig, IndexingStatus } from '../types.js'; import fs from 'fs/promises'; import path from 'path'; import { fileURLToPath } from 'url'; import crypto from 'crypto'; import { glob } from 'glob'; import { fileTypeFromFile } from 'file-type'; import { detectLanguage } from '../utils/language-detection.js'; import { RepositoryConfigLoader } from '../utils/repository-config-loader.js'; import { IndexingStatusManager } from '../utils/indexing-status-manager.js'; const COLLECTION_NAME = 'documentation'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const REPO_CONFIG_DIR = path.join(__dirname, '..', 'repo-configs'); const DEFAULT_CHUNK_SIZE = 1000; export class LocalRepositoryHandler extends BaseHandler { private activeProgressToken: string | number | undefined; private statusManager: IndexingStatusManager; // Track active indexing processes private static activeIndexingProcesses: Map<string, boolean> = new Map(); // Smaller batch size to reduce processing time per batch private static BATCH_SIZE = 50; constructor(server: any, apiClient: any) { super(server, apiClient); this.statusManager = new IndexingStatusManager(); } async handle(args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> { this.activeProgressToken = callContext?.progressToken || callContext?.requestId; // Validate required parameters if (!args.path || typeof args.path !== 'string') { throw new McpError(ErrorCode.InvalidParams, 'Repository path is required'); } // Normalize the repository path const repoPath = path.resolve(args.path); // Check if the repository path exists try { const stats = await fs.stat(repoPath); if (!stats.isDirectory()) { throw new McpError(ErrorCode.InvalidParams, `Path is not a directory: ${repoPath}`); } } catch (error) { throw new McpError(ErrorCode.InvalidParams, `Invalid repository path: ${repoPath}`); } // Create repository configuration const config: RepositoryConfig = { path: repoPath, name: args.name || path.basename(repoPath), include: args.include || ['**/*'], exclude: args.exclude || [ '**/node_modules/**', '**/.git/**', '**/build/**', '**/dist/**', '**/*.min.js', '**/*.map', '**/package-lock.json', '**/yarn.lock' ], watchMode: args.watchMode || false, watchInterval: args.watchInterval || 60000, // Default: 1 minute chunkSize: args.chunkSize || DEFAULT_CHUNK_SIZE, fileTypeConfig: args.fileTypeConfig || { // Default file type configurations '.js': { include: true, chunkStrategy: 'semantic' }, '.ts': { include: true, chunkStrategy: 'semantic' }, '.jsx': { include: true, chunkStrategy: 'semantic' }, '.tsx': { include: true, chunkStrategy: 'semantic' }, '.py': { include: true, chunkStrategy: 'semantic' }, '.java': { include: true, chunkStrategy: 'semantic' }, '.md': { include: true, chunkStrategy: 'semantic' }, '.txt': { include: true, chunkStrategy: 'line' }, '.json': { include: true, chunkStrategy: 'semantic' }, '.html': { include: true, chunkStrategy: 'semantic' }, '.css': { include: true, chunkStrategy: 'semantic' }, '.scss': { include: true, chunkStrategy: 'semantic' }, '.xml': { include: true, chunkStrategy: 'semantic' }, '.yaml': { include: true, chunkStrategy: 'semantic' }, '.yml': { include: true, chunkStrategy: 'semantic' }, } }; try { // Check if indexing is already in progress for this repository if (LocalRepositoryHandler.activeIndexingProcesses.has(config.name)) { // Get current status const status = await this.statusManager.getStatus(config.name); if (status && status.status === 'processing') { return { content: [ { type: 'text', text: `Repository indexing already in progress for ${config.name}.\n` + `Current progress: ${status.percentageComplete || 0}%\n` + `Files processed: ${status.processedFiles || 0} of ${status.totalFiles || 'unknown'}\n` + `Chunks indexed: ${status.indexedChunks || 0} of ${status.totalChunks || 'unknown'}\n` + `Started at: ${new Date(status.startTime).toLocaleString()}` }, ], }; } } // Save the repository configuration await this.saveRepositoryConfig(config); // Update the repositories.json configuration file const configLoader = new RepositoryConfigLoader(this.server, this.apiClient); await configLoader.addRepositoryToConfig(config); console.info(`[${config.name}] Repository configuration saved and loaded.`); if (this.activeProgressToken) { (this.server as any).sendProgress(this.activeProgressToken, { message: "Repository configuration saved." }); } // Create initial status await this.statusManager.createStatus(config.name); // Start the indexing process asynchronously this.processRepositoryAsync(config, this.activeProgressToken); return { content: [ { type: 'text', text: `Repository configuration saved for ${config.name} (${repoPath}).\n` + `Indexing has started in the background and will continue after this response.\n` + `You can check the status using the 'get_indexing_status' tool with parameter name="${config.name}".\n` + `Watch mode: ${config.watchMode ? 'enabled' : 'disabled'}` }, ], }; } catch (error) { if (error instanceof McpError) { throw error; } return { content: [ { type: 'text', text: `Failed to index repository: ${error}`, }, ], isError: true, }; } } private async processRepository(config: RepositoryConfig): Promise<{ chunks: DocumentChunk[], processedFiles: number, skippedFiles: number }> { const chunks: DocumentChunk[] = []; let processedFiles = 0; let skippedFiles = 0; let fileCounter = 0; // Get all files matching the include/exclude patterns const files = await glob(config.include, { cwd: config.path, ignore: config.exclude, absolute: true, nodir: true, }); const totalFiles = files.length; console.info(`[${config.name}] Found ${totalFiles} files to process based on include/exclude patterns.`); if (this.activeProgressToken) { (this.server as any).sendProgress(this.activeProgressToken, { message: `Found ${totalFiles} files to process.` }); } for (const file of files) { fileCounter++; try { const relativePath = path.relative(config.path, file); const extension = path.extname(file); const fileTypeConfig = config.fileTypeConfig[extension]; // Skip files that should be excluded based on file type config if (fileTypeConfig && fileTypeConfig.include === false) { skippedFiles++; continue; } // Read file content const content = await fs.readFile(file, 'utf-8'); // Skip empty files if (!content.trim()) { skippedFiles++; continue; } // Detect language for better processing const language = detectLanguage(file, content); // Process the file content into chunks const fileChunks = this.chunkFileContent( content, file, relativePath, config, language, fileTypeConfig?.chunkStrategy || 'line' ); chunks.push(...fileChunks); processedFiles++; if (fileCounter % 50 === 0 && fileCounter > 0 && this.activeProgressToken) { const percentageComplete = Math.round((fileCounter / totalFiles) * 33); // File processing is ~1/3 of the job (this.server as any).sendProgress(this.activeProgressToken, { message: `Processed ${fileCounter} of ${totalFiles} files...`, percentageComplete }); console.info(`[${config.name}] Processed ${fileCounter} of ${totalFiles} files... (${processedFiles} successful, ${skippedFiles} skipped/errored)`); } } catch (error) { console.error(`[${config.name}] Error processing file ${file}: ${error instanceof Error ? error.message : String(error)}`); skippedFiles++; } } console.info(`[${config.name}] Completed file iteration. Processed: ${processedFiles}, Skipped/Errored: ${skippedFiles}.`); return { chunks, processedFiles, skippedFiles }; } private chunkFileContent( content: string, filePath: string, relativePath: string, config: RepositoryConfig, language: string, chunkStrategy: string ): DocumentChunk[] { const chunks: DocumentChunk[] = []; const timestamp = new Date().toISOString(); const fileUrl = `file://${filePath}`; const title = `${config.name}/${relativePath}`; // Different chunking strategies based on file type let textChunks: string[] = []; switch (chunkStrategy) { case 'semantic': // For semantic chunking, we'd ideally use a more sophisticated approach // For now, we'll use a simple paragraph-based approach textChunks = this.chunkByParagraphs(content, config.chunkSize); break; case 'line': // Chunk by lines, respecting max chunk size textChunks = this.chunkByLines(content, config.chunkSize); break; default: // Default to simple text chunking textChunks = this.chunkText(content, config.chunkSize); } // Create document chunks with metadata chunks.push(...textChunks.map((text, index) => ({ text, url: fileUrl, title, timestamp, filePath: relativePath, language, chunkIndex: index, totalChunks: textChunks.length, }))); return chunks; } private chunkText(text: string, maxChunkSize: number): string[] { const words = text.split(/\s+/); const chunks: string[] = []; let currentChunk: string[] = []; for (const word of words) { currentChunk.push(word); const currentLength = currentChunk.join(' ').length; if (currentLength >= maxChunkSize) { chunks.push(currentChunk.join(' ')); currentChunk = []; } } if (currentChunk.length > 0) { chunks.push(currentChunk.join(' ')); } return chunks; } private chunkByLines(text: string, maxChunkSize: number): string[] { const lines = text.split(/\r?\n/); const chunks: string[] = []; let currentChunk: string[] = []; let currentLength = 0; for (const line of lines) { const lineLength = line.length + 1; // +1 for the newline if (currentLength + lineLength > maxChunkSize && currentChunk.length > 0) { chunks.push(currentChunk.join('\n')); currentChunk = []; currentLength = 0; } currentChunk.push(line); currentLength += lineLength; } if (currentChunk.length > 0) { chunks.push(currentChunk.join('\n')); } return chunks; } private chunkByParagraphs(text: string, maxChunkSize: number): string[] { // Split by double newlines (paragraphs) const paragraphs = text.split(/\r?\n\r?\n/); const chunks: string[] = []; let currentChunk: string[] = []; let currentLength = 0; for (const paragraph of paragraphs) { const paragraphLength = paragraph.length + 2; // +2 for the double newline if (currentLength + paragraphLength > maxChunkSize && currentChunk.length > 0) { chunks.push(currentChunk.join('\n\n')); currentChunk = []; currentLength = 0; } currentChunk.push(paragraph); currentLength += paragraphLength; } if (currentChunk.length > 0) { chunks.push(currentChunk.join('\n\n')); } return chunks; } private async saveRepositoryConfig(config: RepositoryConfig): Promise<void> { // Ensure the config directory exists try { await fs.mkdir(REPO_CONFIG_DIR, { recursive: true }); } catch (error) { console.error('Error creating repository config directory:', error); throw new McpError(ErrorCode.InternalError, 'Failed to create repository config directory'); } // Save the config file const configPath = path.join(REPO_CONFIG_DIR, `${config.name}.json`); await fs.writeFile(configPath, JSON.stringify(config, null, 2), 'utf-8'); } private generatePointId(): string { return crypto.randomBytes(16).toString('hex'); } /** * Process repository asynchronously to avoid MCP timeout */ private async processRepositoryAsync(config: RepositoryConfig, progressToken?: string | number): Promise<void> { try { // Mark this repository as being processed LocalRepositoryHandler.activeIndexingProcesses.set(config.name, true); // Update status to processing await this.statusManager.updateStatus({ repositoryName: config.name, status: 'processing' }); console.info(`[${config.name}] Starting to process repository files asynchronously...`); // Process the repository files const { chunks, processedFiles, skippedFiles } = await this.processRepository(config); // Update status with file processing results await this.statusManager.updateStatus({ repositoryName: config.name, totalFiles: processedFiles + skippedFiles, processedFiles, skippedFiles, totalChunks: chunks.length, percentageComplete: 33 }); console.info(`[${config.name}] Finished processing repository files. Found ${chunks.length} chunks from ${processedFiles} files (${skippedFiles} skipped).`); // Batch process chunks with smaller batch size for better responsiveness const batchSize = LocalRepositoryHandler.BATCH_SIZE; let indexedChunks = 0; const totalChunks = chunks.length; const totalBatches = Math.ceil(totalChunks / batchSize); console.info(`[${config.name}] Starting to generate embeddings and index ${totalChunks} chunks in ${totalBatches} batches...`); const COLLECTION_NAME = 'documentation'; for (let i = 0; i < totalChunks; i += batchSize) { const batchChunks = chunks.slice(i, i + batchSize); const currentBatch = Math.floor(i / batchSize) + 1; // Update status before processing batch await this.statusManager.updateStatus({ repositoryName: config.name, currentBatch, totalBatches, indexedChunks, percentageComplete: 33 + Math.round((i / totalChunks) * 66) }); console.info(`[${config.name}] Processing batch ${currentBatch} of ${totalBatches}...`); try { const embeddingResults = await Promise.allSettled( batchChunks.map(async (chunk) => { try { const embedding = await this.apiClient.getEmbeddings(chunk.text); return { id: this.generatePointId(), vector: embedding, payload: { ...chunk, _type: 'DocumentChunk' as const, repository: config.name, isRepositoryFile: true, } as Record<string, unknown>, }; } catch (embeddingError) { console.error(`[${config.name}] Failed to generate embedding for chunk from ${chunk.filePath || chunk.url}: ${embeddingError instanceof Error ? embeddingError.message : String(embeddingError)}`); throw embeddingError; // Re-throw to be caught by Promise.allSettled } }) ); const successfulPoints = embeddingResults .filter(result => result.status === 'fulfilled') .map(result => (result as PromiseFulfilledResult<any>).value); const failedEmbeddingsCount = embeddingResults.filter(result => result.status === 'rejected').length; if (failedEmbeddingsCount > 0) { console.warn(`[${config.name}] Failed to generate embeddings for ${failedEmbeddingsCount} of ${batchChunks.length} chunks in batch ${currentBatch}.`); } if (successfulPoints.length > 0) { try { await this.apiClient.qdrantClient.upsert(COLLECTION_NAME, { wait: true, points: successfulPoints, }); indexedChunks += successfulPoints.length; } catch (upsertError) { console.error(`[${config.name}] Failed to upsert batch ${currentBatch} of ${successfulPoints.length} points to Qdrant: ${upsertError instanceof Error ? upsertError.message : String(upsertError)}`); } } const percentageComplete = 33 + Math.round(((i + batchChunks.length) / totalChunks) * 66); console.info(`[${config.name}] Processed batch ${currentBatch} of ${totalBatches}. Successfully indexed in this batch: ${successfulPoints.length}. Total indexed so far: ${indexedChunks} chunks.`); // Update status after processing batch await this.statusManager.updateStatus({ repositoryName: config.name, currentBatch, totalBatches, indexedChunks, percentageComplete }); } catch (batchError) { console.error(`[${config.name}] Error processing batch ${currentBatch}:`, batchError); // Continue with next batch despite errors } } // Mark indexing as completed console.info(`[${config.name}] Finished generating embeddings and indexing. Total indexed: ${indexedChunks} of ${totalChunks} chunks.`); await this.statusManager.completeStatus(config.name, true, { processedFiles, skippedFiles, totalChunks, indexedChunks }); // If watch mode is enabled, start the watcher if (config.watchMode) { // This would be implemented in a separate class // this.startRepositoryWatcher(config); } } catch (error) { console.error(`[${config.name}] Error during async repository processing:`, error); // Update status to failed await this.statusManager.completeStatus( config.name, false, undefined, error instanceof Error ? error.message : String(error) ); } finally { // Remove from active processes LocalRepositoryHandler.activeIndexingProcesses.delete(config.name); } } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/rahulretnan/mcp-ragdocs'

If you have feedback or need assistance with the MCP directory API, please join our Discord server