Skip to main content
Glama

RAG Documentation MCP Server

import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; import { BaseHandler } from './base-handler.js'; import { DocumentChunk, McpToolResponse, RepositoryConfig } from '../types.js'; import fs from 'fs/promises'; import path from 'path'; import { glob } from 'glob'; import crypto from 'crypto'; import { detectLanguage } from '../utils/language-detection.js'; import { RepositoryConfigLoader } from '../utils/repository-config-loader.js'; const COLLECTION_NAME = 'documentation'; const REPO_CONFIG_DIR = path.join(process.cwd(), 'repo-configs'); export class UpdateRepositoryHandler extends BaseHandler { private activeProgressToken: string | number | undefined; async handle(args: any, callContext?: { progressToken?: string | number, requestId: string | number }): Promise<McpToolResponse> { this.activeProgressToken = callContext?.progressToken || callContext?.requestId; if (!args.name || typeof args.name !== 'string') { throw new McpError(ErrorCode.InvalidParams, 'Repository name is required'); } const repoName = args.name; const configPath = path.join(REPO_CONFIG_DIR, `${repoName}.json`); try { // Check if the repository config exists try { await fs.access(configPath); } catch { throw new McpError(ErrorCode.InvalidParams, `Repository not found: ${repoName}`); } // Read the config const configContent = await fs.readFile(configPath, 'utf-8'); let config = JSON.parse(configContent) as RepositoryConfig; // Update config with any provided parameters if (args.include) config.include = args.include; if (args.exclude) config.exclude = args.exclude; if (args.watchMode !== undefined) config.watchMode = args.watchMode; if (args.watchInterval) config.watchInterval = args.watchInterval; if (args.chunkSize) config.chunkSize = args.chunkSize; if (args.fileTypeConfig) config.fileTypeConfig = { ...config.fileTypeConfig, ...args.fileTypeConfig }; // Check if the repository path exists try { const stats = await fs.stat(config.path); if (!stats.isDirectory()) { throw new McpError(ErrorCode.InvalidParams, `Path is not a directory: ${config.path}`); } } catch (error) { throw new McpError(ErrorCode.InvalidParams, `Invalid repository path: ${config.path}`); } // Save the updated config await fs.writeFile(configPath, JSON.stringify(config, null, 2), 'utf-8'); // Update the repositories.json configuration file const configLoader = new RepositoryConfigLoader(this.server, this.apiClient); await configLoader.addRepositoryToConfig(config); console.info(`[${config.name}] Repository configuration updated and saved.`); if (this.activeProgressToken) { (this.server as any).sendProgress(this.activeProgressToken, { message: "Repository configuration updated." }); } // Process the repository console.info(`[${config.name}] Starting to re-process repository files...`); if (this.activeProgressToken) { (this.server as any).sendProgress(this.activeProgressToken, { message: "Starting to re-process repository files..." }); } const { chunks, processedFiles, skippedFiles } = await this.processRepository(config); console.info(`[${config.name}] Finished re-processing repository files. Found ${chunks.length} chunks from ${processedFiles} processed files (${skippedFiles} skipped).`); if (this.activeProgressToken) { (this.server as any).sendProgress(this.activeProgressToken, { message: `Finished re-processing files. Found ${chunks.length} chunks.`, percentageComplete: 25 }); // 25% for file processing } // Remove existing repository documents from the vector database console.info(`[${config.name}] Removing existing documents from vector database...`); if (this.activeProgressToken) { (this.server as any).sendProgress(this.activeProgressToken, { message: "Removing existing documents...", percentageComplete: 50 }); // 50% after deletion } await this.apiClient.qdrantClient.delete(COLLECTION_NAME, { filter: { must: [ { key: 'repository', match: { value: repoName } }, { key: 'isRepositoryFile', match: { value: true } } ] }, wait: true }); // Batch process chunks for better performance const batchSize = 100; let indexedChunks = 0; const totalChunks = chunks.length; console.info(`[${config.name}] Starting to generate embeddings and re-index ${totalChunks} chunks...`); if (this.activeProgressToken) { (this.server as any).sendProgress(this.activeProgressToken, { message: `Starting to generate embeddings for ${totalChunks} chunks...`, percentageComplete: 50 }); } for (let i = 0; i < totalChunks; i += batchSize) { const batchChunks = chunks.slice(i, i + batchSize); const embeddingResults = await Promise.allSettled( batchChunks.map(async (chunk) => { try { const embedding = await this.apiClient.getEmbeddings(chunk.text); return { id: this.generatePointId(), vector: embedding, payload: { ...chunk, _type: 'DocumentChunk' as const, repository: config.name, isRepositoryFile: true, } as Record<string, unknown>, }; } catch (embeddingError) { console.error(`[${config.name}] Failed to generate embedding for chunk from ${chunk.filePath || chunk.url} during update: ${embeddingError instanceof Error ? embeddingError.message : String(embeddingError)}`); throw embeddingError; // Re-throw to be caught by Promise.allSettled } }) ); const successfulPoints = embeddingResults .filter(result => result.status === 'fulfilled') .map(result => (result as PromiseFulfilledResult<any>).value); const failedEmbeddingsCount = embeddingResults.filter(result => result.status === 'rejected').length; if (failedEmbeddingsCount > 0) { console.warn(`[${config.name}] Failed to generate embeddings for ${failedEmbeddingsCount} of ${batchChunks.length} chunks in this batch during update.`); } if (successfulPoints.length > 0) { try { await this.apiClient.qdrantClient.upsert(COLLECTION_NAME, { wait: true, points: successfulPoints, }); indexedChunks += successfulPoints.length; } catch (upsertError) { console.error(`[${config.name}] Failed to upsert batch of ${successfulPoints.length} points to Qdrant during update: ${upsertError instanceof Error ? upsertError.message : String(upsertError)}`); } } const percentageComplete = 50 + Math.round(((i + batchChunks.length) / totalChunks) * 50); // Remaining 50% for indexing console.info(`[${config.name}] Re-processed batch ${Math.floor(i / batchSize) + 1} of ${Math.ceil(totalChunks / batchSize)}. Successfully re-indexed in this batch: ${successfulPoints.length}. Total re-indexed so far: ${indexedChunks} chunks.`); if (this.activeProgressToken) { (this.server as any).sendProgress(this.activeProgressToken, { message: `Re-processed ${i + batchChunks.length} of ${totalChunks} chunks for embedding/indexing. Successfully re-indexed: ${indexedChunks}.`, percentageComplete }); } } console.info(`[${config.name}] Finished generating embeddings and re-indexing. Total indexed: ${indexedChunks} chunks.`); if (this.activeProgressToken) { (this.server as any).sendProgress(this.activeProgressToken, { message: `Finished re-indexing ${indexedChunks} chunks.`, percentageComplete: 100 }); } return { content: [ { type: 'text', text: `Successfully updated repository: ${config.name} (${config.path})\n` + `Processed ${processedFiles} files, skipped ${skippedFiles} files\n` + `Created ${chunks.length} chunks, indexed ${indexedChunks} chunks\n` + `Watch mode: ${config.watchMode ? 'enabled' : 'disabled'}`, }, ], }; } catch (error) { if (error instanceof McpError) { throw error; } return { content: [ { type: 'text', text: `Failed to update repository: ${error}`, }, ], isError: true, }; } } private async processRepository(config: RepositoryConfig): Promise<{ chunks: DocumentChunk[], processedFiles: number, skippedFiles: number }> { const chunks: DocumentChunk[] = []; let processedFiles = 0; let skippedFiles = 0; let fileCounter = 0; // Get all files matching the include/exclude patterns const files = await glob(config.include, { cwd: config.path, ignore: config.exclude, absolute: true, nodir: true, }); const totalFiles = files.length; console.info(`[${config.name}] Found ${totalFiles} files to re-process based on include/exclude patterns.`); if (this.activeProgressToken) { (this.server as any).sendProgress(this.activeProgressToken, { message: `Found ${totalFiles} files to re-process.` }); } for (const file of files) { fileCounter++; try { const relativePath = path.relative(config.path, file); const extension = path.extname(file); const fileTypeConfig = config.fileTypeConfig[extension]; // Skip files that should be excluded based on file type config if (fileTypeConfig && fileTypeConfig.include === false) { skippedFiles++; continue; } // Read file content const content = await fs.readFile(file, 'utf-8'); // Skip empty files if (!content.trim()) { skippedFiles++; continue; } // Detect language for better processing const language = detectLanguage(file, content); // Process the file content into chunks const fileChunks = this.chunkFileContent( content, file, relativePath, config, language, fileTypeConfig?.chunkStrategy || 'line' ); chunks.push(...fileChunks); processedFiles++; if (fileCounter % 50 === 0 && fileCounter > 0 && this.activeProgressToken) { const percentageComplete = Math.round((fileCounter / totalFiles) * 25); // File processing is ~1/4 of the job here (this.server as any).sendProgress(this.activeProgressToken, { message: `Re-processed ${fileCounter} of ${totalFiles} files...`, percentageComplete }); console.info(`[${config.name}] Re-processed ${fileCounter} of ${totalFiles} files... (${processedFiles} successful, ${skippedFiles} skipped/errored)`); } } catch (error) { console.error(`[${config.name}] Error processing file ${file}:`, error); skippedFiles++; } } console.info(`[${config.name}] Completed file re-iteration. Processed: ${processedFiles}, Skipped/Errored: ${skippedFiles}.`); return { chunks, processedFiles, skippedFiles }; } private chunkFileContent( content: string, filePath: string, relativePath: string, config: RepositoryConfig, language: string, chunkStrategy: string ): DocumentChunk[] { const chunks: DocumentChunk[] = []; const timestamp = new Date().toISOString(); const fileUrl = `file://${filePath}`; const title = `${config.name}/${relativePath}`; // Different chunking strategies based on file type let textChunks: string[] = []; switch (chunkStrategy) { case 'semantic': // For semantic chunking, we'd ideally use a more sophisticated approach // For now, we'll use a simple paragraph-based approach textChunks = this.chunkByParagraphs(content, config.chunkSize); break; case 'line': // Chunk by lines, respecting max chunk size textChunks = this.chunkByLines(content, config.chunkSize); break; default: // Default to simple text chunking textChunks = this.chunkText(content, config.chunkSize); } // Create document chunks with metadata chunks.push(...textChunks.map((text, index) => ({ text, url: fileUrl, title, timestamp, filePath: relativePath, language, chunkIndex: index, totalChunks: textChunks.length, }))); return chunks; } private chunkText(text: string, maxChunkSize: number): string[] { const words = text.split(/\s+/); const chunks: string[] = []; let currentChunk: string[] = []; for (const word of words) { currentChunk.push(word); const currentLength = currentChunk.join(' ').length; if (currentLength >= maxChunkSize) { chunks.push(currentChunk.join(' ')); currentChunk = []; } } if (currentChunk.length > 0) { chunks.push(currentChunk.join(' ')); } return chunks; } private chunkByLines(text: string, maxChunkSize: number): string[] { const lines = text.split(/\r?\n/); const chunks: string[] = []; let currentChunk: string[] = []; let currentLength = 0; for (const line of lines) { const lineLength = line.length + 1; // +1 for the newline if (currentLength + lineLength > maxChunkSize && currentChunk.length > 0) { chunks.push(currentChunk.join('\n')); currentChunk = []; currentLength = 0; } currentChunk.push(line); currentLength += lineLength; } if (currentChunk.length > 0) { chunks.push(currentChunk.join('\n')); } return chunks; } private chunkByParagraphs(text: string, maxChunkSize: number): string[] { // Split by double newlines (paragraphs) const paragraphs = text.split(/\r?\n\r?\n/); const chunks: string[] = []; let currentChunk: string[] = []; let currentLength = 0; for (const paragraph of paragraphs) { const paragraphLength = paragraph.length + 2; // +2 for the double newline if (currentLength + paragraphLength > maxChunkSize && currentChunk.length > 0) { chunks.push(currentChunk.join('\n\n')); currentChunk = []; currentLength = 0; } currentChunk.push(paragraph); currentLength += paragraphLength; } if (currentChunk.length > 0) { chunks.push(currentChunk.join('\n\n')); } return chunks; } private generatePointId(): string { return crypto.randomBytes(16).toString('hex'); } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/rahulretnan/mcp-ragdocs'

If you have feedback or need assistance with the MCP directory API, please join our Discord server