RagDocs MCP Server

import { QdrantClient } from '@qdrant/js-client-rest'; import { TextChunk } from './text-chunker.js'; export interface DocumentMetadata { url: string; title: string; domain: string; timestamp: string; contentType: string; wordCount: number; hasCode: boolean; chunkIndex: number; totalChunks: number; } export class QdrantError extends Error { constructor(message: string) { super(message); this.name = 'QdrantError'; } } export class QdrantWrapper { public client: QdrantClient; private readonly collectionName = 'documentation'; private readonly vectorSize = 768; // Ollama nomic-embed-text size constructor(url?: string, apiKey?: string) { this.client = new QdrantClient({ url: url || 'http://10.1.1.199:6333', apiKey: apiKey, timeout: 10000 // Add timeout to help debug connection issues }); } /** * Initializes the Qdrant collection if it doesn't exist */ async initializeCollection(): Promise<void> { try { const collections = await this.client.getCollections(); const exists = collections.collections.some(c => c.name === this.collectionName); if (!exists) { await this.client.createCollection(this.collectionName, { vectors: { size: this.vectorSize, distance: 'Cosine', }, optimizers_config: { default_segment_number: 2, }, replication_factor: 1, }); // Create indexes for efficient filtering await this.client.createPayloadIndex(this.collectionName, { field_name: 'url', field_schema: 'keyword', }); await this.client.createPayloadIndex(this.collectionName, { field_name: 'domain', field_schema: 'keyword', }); await this.client.createPayloadIndex(this.collectionName, { field_name: 'timestamp', field_schema: 'datetime', }); } } catch (error) { console.error('Qdrant initialization error:', error); if (error instanceof Error) { console.error('Error details:', { name: error.name, message: error.message, stack: error.stack }); } throw new QdrantError( `Failed to initialize Qdrant collection: ${error instanceof Error ? error.message : String(error)}` ); } } /** * Stores document chunks in the Qdrant collection * @param chunks Text chunks to store * @param embeddings Corresponding embeddings for each chunk * @param metadata Document metadata */ async storeDocumentChunks( chunks: TextChunk[], embeddings: number[][], metadata: Omit<DocumentMetadata, 'chunkIndex' | 'totalChunks'> ): Promise<void> { if (chunks.length !== embeddings.length) { throw new QdrantError('Number of chunks does not match number of embeddings'); } try { const points = chunks.map((chunk, index) => ({ id: this.generatePointId(metadata.url, chunk.index), vector: embeddings[index], payload: { ...metadata, content: chunk.content, chunkIndex: chunk.index, totalChunks: chunks.length, chunkMetadata: chunk.metadata, }, })); await this.client.upsert(this.collectionName, { wait: true, points, }); } catch (error) { throw new QdrantError( `Failed to store document chunks: ${(error as Error).message}` ); } } /** * Checks if a document already exists in the collection * @param url Document URL * @returns true if document exists */ async documentExists(url: string): Promise<boolean> { try { const response = await this.client.scroll(this.collectionName, { filter: { must: [ { key: 'url', match: { value: url, }, }, ], }, limit: 1, }); return response.points.length > 0; } catch (error) { throw new QdrantError( `Failed to check document existence: ${(error as Error).message}` ); } } /** * Removes a document and all its chunks from the collection * @param url Document URL */ async removeDocument(url: string): Promise<void> { try { await this.client.delete(this.collectionName, { filter: { must: [ { key: 'url', match: { value: url, }, }, ], }, wait: true, }); } catch (error) { throw new QdrantError( `Failed to remove document: ${(error as Error).message}` ); } } /** * Generates a unique point ID for a chunk * @param url Document URL * @param chunkIndex Chunk index * @returns Unique point ID */ private generatePointId(url: string, chunkIndex: number): number { // Create a hash of the URL + chunk index const str = `${url}:${chunkIndex}`; let hash = 0; for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i); hash = ((hash << 5) - hash) + char; hash = hash & hash; // Convert to 32-bit integer } return Math.abs(hash); } /** * Gets the health status of the Qdrant server * @returns true if server is healthy */ async isHealthy(): Promise<boolean> { try { await this.client.getCollections(); return true; } catch { return false; } } /** * Lists all documents with pagination support * @param options Listing options including pagination and filtering * @returns Array of document metadata with pagination info */ async listDocuments(options: { offset?: number; limit?: number; domain?: string; sortBy?: 'timestamp' | 'title' | 'domain'; sortOrder?: 'asc' | 'desc'; } = {}): Promise<{ total: number; documents: DocumentMetadata[] }> { const filter: any = { must: [ { key: 'chunkIndex', match: { value: 0 }, // Only get first chunk to avoid duplicates }, ], }; if (options.domain) { filter.must.push({ key: 'domain', match: { value: options.domain }, }); } try { // Get total count first const countResponse = await this.client.count(this.collectionName, { filter, }); // Then get paginated results const response = await this.client.scroll(this.collectionName, { filter, limit: options.limit || 20, offset: options.offset || 0, with_payload: true, with_vector: false, }); const documents = response.points.map(point => { const payload = point.payload as any; return { url: String(payload.url), title: String(payload.title), domain: String(payload.domain), timestamp: String(payload.timestamp), contentType: String(payload.contentType), wordCount: Number(payload.wordCount), hasCode: Boolean(payload.hasCode), chunkIndex: Number(payload.chunkIndex), totalChunks: Number(payload.totalChunks), }; }); return { total: countResponse.count, documents, }; } catch (error) { throw new QdrantError( `Failed to list documents: ${(error as Error).message}` ); } } /** * Performs a semantic search using vector similarity * @param queryVector Query embedding vector * @param options Search options * @returns Array of search results with scores */ async searchSimilar( queryVector: number[], options: { limit?: number; scoreThreshold?: number; filters?: { domain?: string; hasCode?: boolean; after?: string; before?: string; }; } = {} ): Promise<Array<DocumentMetadata & { score: number; content: string }>> { const limit = options.limit || 5; const scoreThreshold = options.scoreThreshold || 0.7; const filter: any = { must: [] }; // Add filters if specified if (options.filters?.domain) { filter.must.push({ key: 'domain', match: { value: options.filters.domain }, }); } if (options.filters?.hasCode !== undefined) { filter.must.push({ key: 'hasCode', match: { value: options.filters.hasCode }, }); } if (options.filters?.after) { filter.must.push({ key: 'timestamp', range: { gte: options.filters.after }, }); } if (options.filters?.before) { filter.must.push({ key: 'timestamp', range: { lte: options.filters.before }, }); } try { const response = await this.client.search(this.collectionName, { vector: queryVector, limit: Math.ceil(limit * 1.5), // Request extra results for post-filtering score_threshold: scoreThreshold, filter: filter.must.length > 0 ? filter : undefined, with_payload: true, }); return response .map(hit => { const payload = hit.payload as any; if (!payload || typeof payload !== 'object') { throw new QdrantError('Invalid payload structure in search result'); } // Extract and validate required fields const result = { score: hit.score || 0, url: String(payload.url), title: String(payload.title), domain: String(payload.domain), timestamp: String(payload.timestamp), contentType: String(payload.contentType), wordCount: Number(payload.wordCount), hasCode: Boolean(payload.hasCode), chunkIndex: Number(payload.chunkIndex), totalChunks: Number(payload.totalChunks), content: String(payload.content), }; // Validate all fields are present and of correct type if (Object.values(result).some(v => v === undefined)) { throw new QdrantError('Missing required fields in search result'); } return result; }) .slice(0, limit); // Return only requested number of results } catch (error) { throw new QdrantError( `Failed to perform search: ${(error as Error).message}` ); } } }