MCP-Ragdocs

Apache 2.0

128

mcp-ragdocs
src

index.ts•16.8 kB

#!/usr/bin/env node import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, } from '@modelcontextprotocol/sdk/types.js'; import { QdrantClient } from '@qdrant/js-client-rest'; import { chromium } from 'playwright'; import * as cheerio from 'cheerio'; import axios from 'axios'; import crypto from 'crypto'; import { EmbeddingService } from './embeddings.js'; // Environment variables for configuration const OLLAMA_URL = process.env.OLLAMA_URL || 'http://localhost:11434'; // Force using IP address to avoid hostname resolution issues const QDRANT_URL = 'http://127.0.0.1:6333'; const COLLECTION_NAME = 'documentation'; const EMBEDDING_PROVIDER = process.env.EMBEDDING_PROVIDER || 'ollama'; const EMBEDDING_MODEL = process.env.EMBEDDING_MODEL; const OPENAI_API_KEY = process.env.OPENAI_API_KEY; interface QdrantCollectionConfig { params: { vectors: { size: number; distance: string; }; }; } interface QdrantCollectionInfo { config: QdrantCollectionConfig; } interface DocumentChunk { text: string; url: string; title: string; timestamp: string; } interface DocumentPayload extends DocumentChunk { _type: 'DocumentChunk'; [key: string]: unknown; } function isDocumentPayload(payload: unknown): payload is DocumentPayload { if (!payload || typeof payload !== 'object') return false; const p = payload as Partial<DocumentPayload>; return ( p._type === 'DocumentChunk' && typeof p.text === 'string' && typeof p.url === 'string' && typeof p.title === 'string' && typeof p.timestamp === 'string' ); } class RagDocsServer { private server: Server; private qdrantClient!: QdrantClient; private browser: any; private embeddingService!: EmbeddingService; private async testQdrantConnection() { try { const response = await this.qdrantClient.getCollections(); console.error('Successfully connected to Qdrant. Collections:', response.collections); } catch (error) { console.error('Failed initial Qdrant connection test:', error); if (error instanceof Error) { throw new McpError( ErrorCode.InternalError, `Failed to establish initial connection to Qdrant server: ${error.message}` ); } throw new McpError( ErrorCode.InternalError, 'Failed to establish initial connection to Qdrant server: Unknown error' ); } } private async init() { // Test connection with direct axios call const axiosInstance = axios.create({ baseURL: 'http://127.0.0.1:6333', headers: { 'Content-Type': 'application/json', 'Accept': 'application/json' } }); // Test connection try { const response = await axiosInstance.get('/collections'); console.error('Successfully connected to Qdrant:', response.data); } catch (error) { console.error('Failed to connect to Qdrant:', error); throw new McpError( ErrorCode.InternalError, 'Failed to establish initial connection to Qdrant server' ); } // Initialize Qdrant client with minimal configuration this.qdrantClient = new QdrantClient({ url: 'http://127.0.0.1:6333' }); // Initialize embedding service from environment configuration this.embeddingService = EmbeddingService.createFromConfig({ provider: EMBEDDING_PROVIDER as 'ollama' | 'openai', model: EMBEDDING_MODEL, apiKey: OPENAI_API_KEY }); this.setupToolHandlers(); } constructor() { this.server = new Server( { name: 'mcp-ragdocs', version: '0.1.0', }, { capabilities: { tools: {}, }, } ); // Error handling this.server.onerror = (error) => console.error('[MCP Error]', error); process.on('SIGINT', async () => { await this.cleanup(); process.exit(0); }); } private async cleanup() { if (this.browser) { await this.browser.close(); } await this.server.close(); } private async initBrowser() { if (!this.browser) { this.browser = await chromium.launch(); } } private async getEmbeddings(text: string): Promise<number[]> { return this.embeddingService.generateEmbeddings(text); } private async initCollection() { try { // First ensure we can connect to Qdrant await this.testQdrantConnection(); const requiredVectorSize = this.embeddingService.getVectorSize(); try { // Check if collection exists const collections = await this.qdrantClient.getCollections(); const collection = collections.collections.find(c => c.name === COLLECTION_NAME); if (!collection) { console.error(`Creating new collection with vector size ${requiredVectorSize}`); await this.qdrantClient.createCollection(COLLECTION_NAME, { vectors: { size: requiredVectorSize, distance: 'Cosine', }, }); return; } // Get collection info to check vector size const collectionInfo = await this.qdrantClient.getCollection(COLLECTION_NAME) as QdrantCollectionInfo; const currentVectorSize = collectionInfo.config?.params?.vectors?.size; if (!currentVectorSize) { console.error('Could not determine current vector size, recreating collection...'); await this.recreateCollection(requiredVectorSize); return; } if (currentVectorSize !== requiredVectorSize) { console.error(`Vector size mismatch: collection=${currentVectorSize}, required=${requiredVectorSize}`); await this.recreateCollection(requiredVectorSize); } } catch (error) { console.error('Failed to initialize collection:', error); throw new McpError( ErrorCode.InternalError, 'Failed to initialize Qdrant collection. Please check server logs for details.' ); } } catch (error) { if (error instanceof McpError) { throw error; } throw new McpError( ErrorCode.InternalError, `Unexpected error initializing Qdrant: ${error}` ); } } private async recreateCollection(vectorSize: number) { try { console.error('Recreating collection with new vector size...'); await this.qdrantClient.deleteCollection(COLLECTION_NAME); await this.qdrantClient.createCollection(COLLECTION_NAME, { vectors: { size: vectorSize, distance: 'Cosine', }, }); console.error(`Collection recreated with new vector size ${vectorSize}`); } catch (error) { throw new McpError( ErrorCode.InternalError, `Failed to recreate collection: ${error}` ); } } private async fetchAndProcessUrl(url: string): Promise<DocumentChunk[]> { await this.initBrowser(); const page = await this.browser.newPage(); try { await page.goto(url, { waitUntil: 'networkidle' }); const content = await page.content(); const $ = cheerio.load(content); // Remove script tags, style tags, and comments $('script').remove(); $('style').remove(); $('noscript').remove(); // Extract main content const title = $('title').text() || url; const mainContent = $('main, article, .content, .documentation, body').text(); // Split content into chunks const chunks = this.chunkText(mainContent, 1000); return chunks.map(chunk => ({ text: chunk, url, title, timestamp: new Date().toISOString(), })); } catch (error) { throw new McpError( ErrorCode.InternalError, `Failed to fetch URL ${url}: ${error}` ); } finally { await page.close(); } } private chunkText(text: string, maxChunkSize: number): string[] { const words = text.split(/\s+/); const chunks: string[] = []; let currentChunk: string[] = []; for (const word of words) { currentChunk.push(word); const currentLength = currentChunk.join(' ').length; if (currentLength >= maxChunkSize) { chunks.push(currentChunk.join(' ')); currentChunk = []; } } if (currentChunk.length > 0) { chunks.push(currentChunk.join(' ')); } return chunks; } private generatePointId(): string { return crypto.randomBytes(16).toString('hex'); } private setupToolHandlers() { this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: 'add_documentation', description: 'Add documentation from a URL to the RAG database', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'URL of the documentation to fetch', }, }, required: ['url'], }, }, { name: 'search_documentation', description: 'Search through stored documentation', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query', }, limit: { type: 'number', description: 'Maximum number of results to return', default: 5, }, }, required: ['query'], }, }, { name: 'list_sources', description: 'List all documentation sources currently stored', inputSchema: { type: 'object', properties: {}, }, }, { name: 'test_ollama', description: 'Test embeddings functionality', inputSchema: { type: 'object', properties: { text: { type: 'string', description: 'Text to generate embeddings for', }, provider: { type: 'string', description: 'Embedding provider to use (ollama or openai)', enum: ['ollama', 'openai'], default: 'ollama' }, apiKey: { type: 'string', description: 'OpenAI API key (required if provider is openai)', }, model: { type: 'string', description: 'Model to use for embeddings', }, }, required: ['text'], }, }, ], })); this.server.setRequestHandler(CallToolRequestSchema, async (request) => { switch (request.params.name) { case 'add_documentation': case 'search_documentation': case 'list_sources': await this.initCollection(); break; } switch (request.params.name) { case 'add_documentation': return this.handleAddDocumentation(request.params.arguments); case 'search_documentation': return this.handleSearchDocumentation(request.params.arguments); case 'list_sources': return this.handleListSources(); case 'test_ollama': return this.handleTestEmbeddings(request.params.arguments); default: throw new McpError( ErrorCode.MethodNotFound, `Unknown tool: ${request.params.name}` ); } }); } private async handleTestEmbeddings(args: any) { if (!args.text || typeof args.text !== 'string') { throw new McpError(ErrorCode.InvalidParams, 'Text is required'); } try { // Create a new embedding service instance with the requested configuration const tempEmbeddingService = EmbeddingService.createFromConfig({ provider: args.provider || 'ollama', apiKey: args.apiKey, model: args.model }); const embedding = await tempEmbeddingService.generateEmbeddings(args.text); const provider = args.provider || 'ollama'; const model = args.model || (provider === 'ollama' ? 'nomic-embed-text' : 'text-embedding-3-small'); // If test is successful, update the server's embedding service this.embeddingService = tempEmbeddingService; // Reinitialize collection with new vector size await this.initCollection(); return { content: [ { type: 'text', text: `Successfully configured ${provider} embeddings (${model}).\nVector size: ${embedding.length}\nQdrant collection updated to match new vector size.`, }, ], }; } catch (error) { return { content: [ { type: 'text', text: `Failed to test embeddings: ${error}`, }, ], isError: true, }; } } private async handleAddDocumentation(args: any) { if (!args.url || typeof args.url !== 'string') { throw new McpError(ErrorCode.InvalidParams, 'URL is required'); } try { const chunks = await this.fetchAndProcessUrl(args.url); for (const chunk of chunks) { const embedding = await this.getEmbeddings(chunk.text); const payload = { ...chunk, _type: 'DocumentChunk' as const, }; await this.qdrantClient.upsert(COLLECTION_NAME, { wait: true, points: [ { id: this.generatePointId(), vector: embedding, payload: payload as Record<string, unknown>, }, ], }); } return { content: [ { type: 'text', text: `Successfully added documentation from ${args.url} (${chunks.length} chunks processed)`, }, ], }; } catch (error) { return { content: [ { type: 'text', text: `Failed to add documentation: ${error}`, }, ], isError: true, }; } } private async handleSearchDocumentation(args: any) { if (!args.query || typeof args.query !== 'string') { throw new McpError(ErrorCode.InvalidParams, 'Query is required'); } const limit = args.limit || 5; try { const queryEmbedding = await this.getEmbeddings(args.query); const searchResults = await this.qdrantClient.search(COLLECTION_NAME, { vector: queryEmbedding, limit, with_payload: true, }); const formattedResults = searchResults.map(result => { if (!isDocumentPayload(result.payload)) { throw new Error('Invalid payload type'); } return `[${result.payload.title}](${result.payload.url})\nScore: ${result.score}\nContent: ${result.payload.text}\n`; }).join('\n---\n'); return { content: [ { type: 'text', text: formattedResults || 'No results found.', }, ], }; } catch (error) { return { content: [ { type: 'text', text: `Search failed: ${error}`, }, ], isError: true, }; } } private async handleListSources() { try { const scroll = await this.qdrantClient.scroll(COLLECTION_NAME, { with_payload: true, }); const sources = new Set<string>(); for (const point of scroll.points) { if (isDocumentPayload(point.payload)) { sources.add(`${point.payload.title} (${point.payload.url})`); } } return { content: [ { type: 'text', text: Array.from(sources).join('\n') || 'No documentation sources found.', }, ], }; } catch (error) { return { content: [ { type: 'text', text: `Failed to list sources: ${error}`, }, ], isError: true, }; } } async run() { try { await this.init(); const transport = new StdioServerTransport(); await this.server.connect(transport); console.error('RAG Docs MCP server running on stdio'); } catch (error) { console.error('Failed to initialize server:', error); process.exit(1); } } } const server = new RagDocsServer(); server.run().catch(console.error);

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/qpd-v/mcp-ragdocs'

If you have feedback or need assistance with the MCP directory API, please join our Discord server