Vectorize

Official
  • src
#!/usr/bin/env node import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { Tool, CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js'; import dotenv from 'dotenv'; import { Configuration, ExtractionApi, FilesApi, PipelinesApi } from '@vectorize-io/vectorize-client'; dotenv.config(); const RETRIEVAL_TOOL: Tool = { name: 'retrieve', description: 'Retrieve documents from a Vectorize pipeline.', inputSchema: { type: 'object', properties: { pipelineId: { type: 'string', description: 'The pipeline ID to retrieve documents from.', }, question: { type: 'string', description: 'The term to search for.', }, k: { type: 'number', description: 'The number of documents to retrieve.', }, }, required: ['pipelineId', 'question', 'k'], }, }; const DEEP_RESEARCH_TOOL: Tool = { name: 'deep-research', description: 'Generate a deep research on a Vectorize pipeline.', inputSchema: { type: 'object', properties: { pipelineId: { type: 'string', description: 'The pipeline ID to retrieve documents from.', }, query: { type: 'string', description: 'The deep research query.', }, webSearch: { type: 'boolean', description: 'Whether to perform a web search.', }, }, required: ['pipelineId', 'query', 'webSearch'], }, }; const EXTRACTION_TOOL: Tool = { name: 'extract', description: 'Perform text extraction and chunking on a document.', inputSchema: { type: 'object', properties: { base64Document: { type: 'string', description: 'Document encoded in base64.', }, contentType: { type: 'string', description: 'Document content type.', }, }, required: ['base64Document', 'contentType'], }, }; // Server implementation const server = new Server( { name: 'vectorize-mcp', version: '0.1.0', }, { capabilities: { tools: {}, logging: {}, }, } ); // Get optional API URL const VECTORIZE_ORG_ID = process.env.VECTORIZE_ORG_ID; const VECTORIZE_TOKEN = process.env.VECTORIZE_TOKEN; // Check if API key is required (only for cloud service) if (!VECTORIZE_ORG_ID || !VECTORIZE_TOKEN) { console.error( 'Error: VECTORIZE_TOKEN and VECTORIZE_ORG_ID environment variable are required' ); process.exit(1); } const vectorizeApi = new Configuration({ accessToken: VECTORIZE_TOKEN, }); server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [RETRIEVAL_TOOL, EXTRACTION_TOOL, DEEP_RESEARCH_TOOL], })); async function performRetrieval( orgId: string, pipelineId: string, question: string, k: number ) { const pipelinesApi = new PipelinesApi(vectorizeApi); const response = await pipelinesApi.retrieveDocuments({ organization: orgId, pipeline: pipelineId + '', retrieveDocumentsRequest: { question: question + '', numResults: k, }, }); return { content: [{ type: 'text', text: JSON.stringify(response) }], isError: false, }; } async function performExtraction( orgId: string, base64Document: string, contentType: string ) { const filesApi = new FilesApi(vectorizeApi); const startResponse = await filesApi.startFileUpload({ organization: orgId, startFileUploadRequest: { name: "My File", contentType } }); const fileBuffer = Buffer.from(base64Document, 'base64'); const fetchResponse = await fetch(startResponse.uploadUrl, { method: 'PUT', body: fileBuffer, headers: { 'Content-Type': contentType }, }); if (!fetchResponse.ok) { throw new Error(`Failed to upload file: ${fetchResponse.statusText}`); } const extractionApi = new ExtractionApi(vectorizeApi); const response = await extractionApi.startExtraction({ organization: orgId, startExtractionRequest: { fileId: startResponse.fileId, chunkSize: 512, } }) const extractionId = response.extractionId; // eslint-disable-next-line no-constant-condition while (true) { const result = await extractionApi.getExtractionResult({ organization: orgId, extractionId: extractionId, }) if (result.ready) { if (result.data?.success) { return { content: [{ type: 'text', text: JSON.stringify(result.data) }], isError: false, } } else { throw new Error(`Extraction failed: ${result.data?.error}`); } } else { await new Promise((resolve) => setTimeout(resolve, 1000)); } } } async function performDeepResearch( orgId: string, pipelineId: string, query: string, webSearch: boolean ) { const pipelinesApi = new PipelinesApi(vectorizeApi); const response = await pipelinesApi.startDeepResearch({ organization: orgId, pipeline: pipelineId, startDeepResearchRequest: { query, webSearch } }); const researchId = response.researchId; // eslint-disable-next-line no-constant-condition while (true) { const result = await pipelinesApi.getDeepResearchResult({ organization: orgId, pipeline: pipelineId, researchId: researchId }) if (result.ready) { if (result.data?.success) { return { content: [{ type: 'text', text: result.data.markdown }], isError: false, } } else { throw new Error(`Deep research failed: ${result.data?.error}`); } break } else { await new Promise((resolve) => setTimeout(resolve, 1000)); } } } server.setRequestHandler(CallToolRequestSchema, async (request) => { try { const { name, arguments: args } = request.params; // Log incoming request with timestamp server.sendLoggingMessage({ level: 'info', data: `[${new Date().toISOString()}] Received request for tool: ${name}`, }); if (!args) { throw new Error('No arguments provided'); } switch (name) { case 'retrieve': { return await performRetrieval( VECTORIZE_ORG_ID, args.pipelineId + '', args.question + '', Number(args.k) ); } case 'extract': { return await performExtraction( VECTORIZE_ORG_ID, args.base64Document + '', args.contentType + '' ); } case 'deep-research': { return await performDeepResearch( VECTORIZE_ORG_ID, args.pipelineId + '', args.query + '', Boolean(args.webSearch) ); } default: throw new Error(`Tool not found: ${name}`); } } catch (error) { server.sendLoggingMessage({ level: 'error', data: { message: `Request failed: ${ error instanceof Error ? error.message : String(error) }`, tool: request.params.name, arguments: request.params.arguments, timestamp: new Date().toISOString(), }, }); throw error; } }); // Server startup async function runServer() { const transport = new StdioServerTransport(); await server.connect(transport); // Now that we're connected, we can send logging messages server.sendLoggingMessage({ level: 'info', data: 'Vectorize MCP Server initialized successfully', }); server.sendLoggingMessage({ level: 'info', data: `Configuration: Organization ID: ${VECTORIZE_ORG_ID || 'default'}`, }); console.error('Vectorize MCP Server running'); } runServer().catch((error) => { console.error('Fatal error running server:', error); process.exit(1); });