RagDocs MCP Server
- src
- tools
import OpenAI from 'openai';
import { URLProcessor, URLProcessingError } from './url-processor.js';
import { ContentFetcher, ContentFetchError } from './content-fetcher.js';
import { TextChunker } from './text-chunker.js';
import { QdrantWrapper, QdrantError } from './qdrant-client.js';
export class AddDocumentationError extends Error {
constructor(message: string, public readonly step: string) {
super(message);
this.name = 'AddDocumentationError';
}
}
export interface AddDocumentationResult {
url: string;
title: string;
chunks: number;
wordCount: number;
}
export class AddDocumentationTool {
private openai: OpenAI;
private qdrant: QdrantWrapper;
constructor(openaiApiKey: string, qdrantUrl?: string) {
if (!openaiApiKey) {
throw new Error('OpenAI API key is required');
}
this.openai = new OpenAI({
apiKey: openaiApiKey,
});
this.qdrant = new QdrantWrapper(qdrantUrl);
}
/**
* Adds a document to the RAG system
* @param url URL of the document to add
* @returns Result of the operation
*/
async addDocument(url: string): Promise<AddDocumentationResult> {
try {
// Check Qdrant health
const isHealthy = await this.qdrant.isHealthy();
if (!isHealthy) {
throw new AddDocumentationError(
'Qdrant server is not available',
'health_check'
);
}
// Initialize collection if needed
await this.qdrant.initializeCollection();
// Process URL
const processedUrl = URLProcessor.processURL(url);
if (!processedUrl.isValid) {
throw new AddDocumentationError('Invalid URL format', 'url_validation');
}
// Check if document already exists
const exists = await this.qdrant.documentExists(processedUrl.normalizedUrl);
if (exists) {
// Remove existing document before adding new version
await this.qdrant.removeDocument(processedUrl.normalizedUrl);
}
// Fetch content
const content = await ContentFetcher.fetchContent(processedUrl.normalizedUrl);
// Chunk content
const chunks = TextChunker.chunkText(content.content, {
maxChunkSize: 1500, // Leave room for metadata in context window
minChunkSize: 100,
overlap: 200,
respectCodeBlocks: true,
});
// Generate embeddings for each chunk
const embeddings = await this.generateEmbeddings(
chunks.map(chunk => chunk.content)
);
// Store in Qdrant
await this.qdrant.storeDocumentChunks(chunks, embeddings, {
url: processedUrl.normalizedUrl,
title: content.title,
domain: processedUrl.domain,
timestamp: content.timestamp,
contentType: content.metadata.contentType,
wordCount: content.metadata.wordCount,
hasCode: content.metadata.hasCode,
});
return {
url: processedUrl.normalizedUrl,
title: content.title,
chunks: chunks.length,
wordCount: content.metadata.wordCount,
};
} catch (error) {
if (
error instanceof URLProcessingError ||
error instanceof ContentFetchError ||
error instanceof QdrantError ||
error instanceof AddDocumentationError
) {
throw error;
}
throw new AddDocumentationError(
`Unexpected error: ${(error as Error).message}`,
'unknown'
);
}
}
/**
* Generates embeddings for text chunks using OpenAI's API
* @param chunks Array of text chunks
* @returns Array of embeddings
*/
private async generateEmbeddings(chunks: string[]): Promise<number[][]> {
try {
const response = await this.openai.embeddings.create({
model: 'text-embedding-ada-002',
input: chunks,
});
return response.data.map(item => item.embedding);
} catch (error) {
throw new AddDocumentationError(
`Failed to generate embeddings: ${(error as Error).message}`,
'embedding_generation'
);
}
}
}