RagDocs MCP Server
- src
- handlers
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { BaseHandler } from './base-handler.js';
import { ApiClient } from '../api-client.js';
import { DocumentChunk, ToolResult } from '../types.js';
import * as cheerio from 'cheerio';
import crypto from 'crypto';
const COLLECTION_NAME = 'documentation';
const BATCH_SIZE = 100;
export class AddDocumentationHandler extends BaseHandler {
constructor(server: Server, apiClient: ApiClient) {
super(server, apiClient);
}
async handle(args: any): Promise<ToolResult> {
if (!args.url || typeof args.url !== 'string') {
throw new McpError(ErrorCode.InvalidParams, 'URL is required');
}
try {
const chunks = await this.fetchAndProcessUrl(args.url);
// Batch process chunks for better performance
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
const batch = chunks.slice(i, i + BATCH_SIZE);
const points = await Promise.all(
batch.map(async (chunk) => {
const embedding = await this.apiClient.getEmbeddings(chunk.text);
return {
id: this.generatePointId(),
vector: embedding,
payload: {
...chunk,
_type: 'DocumentChunk' as const,
} as Record<string, unknown>,
};
})
);
try {
await this.apiClient.qdrantClient.upsert(COLLECTION_NAME, {
wait: true,
points,
});
} catch (error) {
if (error instanceof Error) {
if (error.message.includes('unauthorized')) {
throw new McpError(
ErrorCode.InvalidRequest,
'Failed to authenticate with Qdrant cloud while adding documents'
);
} else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
throw new McpError(
ErrorCode.InternalError,
'Connection to Qdrant cloud failed while adding documents'
);
}
}
throw error;
}
}
return {
content: [
{
type: 'text',
text: `Successfully added documentation from ${args.url} (${chunks.length} chunks processed in ${Math.ceil(chunks.length / BATCH_SIZE)} batches)`,
},
],
};
} catch (error) {
if (error instanceof McpError) {
throw error;
}
return {
content: [
{
type: 'text',
text: `Failed to add documentation: ${error}`,
},
],
isError: true,
};
}
}
private async fetchAndProcessUrl(url: string): Promise<DocumentChunk[]> {
await this.apiClient.initBrowser();
const page = await this.apiClient.browser.newPage();
try {
await page.goto(url, { waitUntil: 'networkidle' });
const content = await page.content();
const $ = cheerio.load(content);
// Remove script tags, style tags, and comments
$('script').remove();
$('style').remove();
$('noscript').remove();
// Extract main content
const title = $('title').text() || url;
const mainContent = $('main, article, .content, .documentation, body').text();
// Split content into chunks
const chunks = this.chunkText(mainContent, 1000);
return chunks.map(chunk => ({
text: chunk,
url,
title,
timestamp: new Date().toISOString(),
}));
} catch (error) {
throw new McpError(
ErrorCode.InternalError,
`Failed to fetch URL ${url}: ${error}`
);
} finally {
await page.close();
}
}
private chunkText(text: string, maxChunkSize: number): string[] {
const words = text.split(/\s+/);
const chunks: string[] = [];
let currentChunk: string[] = [];
for (const word of words) {
currentChunk.push(word);
const currentLength = currentChunk.join(' ').length;
if (currentLength >= maxChunkSize) {
chunks.push(currentChunk.join(' '));
currentChunk = [];
}
}
if (currentChunk.length > 0) {
chunks.push(currentChunk.join(' '));
}
return chunks;
}
private generatePointId(): string {
return crypto.randomBytes(16).toString('hex');
}
}