Vectorize

Official
by vectorize-io
JavaScript
MIT License
123
Reddit Discord
OverviewInspectSchema Related Servers Reviews Score
Need Help?View Source Code Report Issue
src/index.ts
#!/usr/bin/env node

import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import {
  Tool,
  CallToolRequestSchema,
  ListToolsRequestSchema,
} from '@modelcontextprotocol/sdk/types.js';

import dotenv from 'dotenv';
import { Configuration, ExtractionApi, FilesApi, PipelinesApi } from '@vectorize-io/vectorize-client';

dotenv.config();

const RETRIEVAL_TOOL: Tool = {
  name: 'retrieve',
  description: 'Retrieve documents from a Vectorize pipeline.',
  inputSchema: {
    type: 'object',
    properties: {
      pipelineId: {
        type: 'string',
        description: 'The pipeline ID to retrieve documents from.',
      },
      question: {
        type: 'string',
        description: 'The term to search for.',
      },
      k: {
        type: 'number',
        description: 'The number of documents to retrieve.',
      },
    },
    required: ['pipelineId', 'question', 'k'],
  },
};


const DEEP_RESEARCH_TOOL: Tool = {
  name: 'deep-research',
  description: 'Generate a deep research on a Vectorize pipeline.',
  inputSchema: {
    type: 'object',
    properties: {
      pipelineId: {
        type: 'string',
        description: 'The pipeline ID to retrieve documents from.',
      },
      query: {
        type: 'string',
        description: 'The deep research query.',
      },
      webSearch: {
        type: 'boolean',
        description: 'Whether to perform a web search.',
      },
    },
    required: ['pipelineId', 'query', 'webSearch'],
  },
};

const EXTRACTION_TOOL: Tool = {
  name: 'extract',
  description: 'Perform text extraction and chunking on a document.',
  inputSchema: {
    type: 'object',
    properties: {
      base64Document: {
        type: 'string',
        description: 'Document encoded in base64.',
      },
      contentType: {
        type: 'string',
        description: 'Document content type.',
      },

    },
    required: ['base64Document', 'contentType'],
  },
};

// Server implementation
const server = new Server(
  {
    name: 'vectorize-mcp',
    version: '0.1.0',
  },
  {
    capabilities: {
      tools: {},
      logging: {},
    },
  }
);

// Get optional API URL
const VECTORIZE_ORG_ID = process.env.VECTORIZE_ORG_ID;
const VECTORIZE_TOKEN = process.env.VECTORIZE_TOKEN;
// Check if API key is required (only for cloud service)
if (!VECTORIZE_ORG_ID || !VECTORIZE_TOKEN) {
  console.error(
    'Error: VECTORIZE_TOKEN and VECTORIZE_ORG_ID environment variable are required'
  );
  process.exit(1);
}
const vectorizeApi = new Configuration({
  accessToken: VECTORIZE_TOKEN,
});

server.setRequestHandler(ListToolsRequestSchema, async () => ({
  tools: [RETRIEVAL_TOOL, EXTRACTION_TOOL, DEEP_RESEARCH_TOOL],
}));

async function performRetrieval(
  orgId: string,
  pipelineId: string,
  question: string,
  k: number
) {
  const pipelinesApi = new PipelinesApi(vectorizeApi);
  const response = await pipelinesApi.retrieveDocuments({
    organization: orgId,
    pipeline: pipelineId + '',
    retrieveDocumentsRequest: {
      question: question + '',
      numResults: k,
    },
  });
  return {
    content: [{ type: 'text', text: JSON.stringify(response) }],
    isError: false,
  };
}


async function performExtraction(
  orgId: string,
  base64Document: string,
  contentType: string
) {
  const filesApi = new FilesApi(vectorizeApi);
  const startResponse = await filesApi.startFileUpload({
    organization: orgId,
    startFileUploadRequest: {
      name: "My File",
      contentType
    }
  });

  const fileBuffer = Buffer.from(base64Document, 'base64');
  const fetchResponse = await fetch(startResponse.uploadUrl, {
    method: 'PUT',
    body: fileBuffer,
    headers: {
      'Content-Type': contentType
    },
  });
  if (!fetchResponse.ok) {
    throw new Error(`Failed to upload file: ${fetchResponse.statusText}`);
  }

  const extractionApi = new ExtractionApi(vectorizeApi);
  const response = await extractionApi.startExtraction({
    organization: orgId,
    startExtractionRequest: {
      fileId: startResponse.fileId,
      chunkSize: 512,
    }
  })
  const extractionId = response.extractionId;
  // eslint-disable-next-line no-constant-condition
  while (true) {
    const result = await extractionApi.getExtractionResult({
      organization: orgId,
      extractionId: extractionId,
    })
    if (result.ready) {
      if (result.data?.success) {
        return {
          content: [{ type: 'text', text: JSON.stringify(result.data) }],
          isError: false,
        }
      } else {
        throw new Error(`Extraction failed: ${result.data?.error}`);
      }
    } else {
      await new Promise((resolve) => setTimeout(resolve, 1000));
    }
  }
}



async function performDeepResearch(
  orgId: string,
  pipelineId: string,
  query: string,
  webSearch: boolean
) {
  const pipelinesApi = new PipelinesApi(vectorizeApi);
  const response = await pipelinesApi.startDeepResearch({
    organization: orgId,
    pipeline: pipelineId,
    startDeepResearchRequest: {
      query,
      webSearch
    }
  });
  const researchId = response.researchId;
  // eslint-disable-next-line no-constant-condition
  while (true) {
    const result = await pipelinesApi.getDeepResearchResult({
      organization: orgId,
      pipeline: pipelineId,
      researchId: researchId
    })
    if (result.ready) {
      if (result.data?.success) {
        return {
          content: [{ type: 'text', text: result.data.markdown }],
          isError: false,
        }
      } else {
        throw new Error(`Deep research failed: ${result.data?.error}`);
      }
      break
    } else {
      await new Promise((resolve) => setTimeout(resolve, 1000));
    }
  }
}


server.setRequestHandler(CallToolRequestSchema, async (request) => {
  try {
    const { name, arguments: args } = request.params;

    // Log incoming request with timestamp
    server.sendLoggingMessage({
      level: 'info',
      data: `[${new Date().toISOString()}] Received request for tool: ${name}`,
    });

    if (!args) {
      throw new Error('No arguments provided');
    }

    switch (name) {
      case 'retrieve': {
        return await performRetrieval(
          VECTORIZE_ORG_ID,
          args.pipelineId + '',
          args.question + '',
          Number(args.k)
        );
      }
      case 'extract': {
        return await performExtraction(
          VECTORIZE_ORG_ID,
          args.base64Document + '',
          args.contentType + ''
        );
      }
      case 'deep-research': {
        return await performDeepResearch(
          VECTORIZE_ORG_ID,
          args.pipelineId + '',
          args.query + '',
          Boolean(args.webSearch)
        );
      }
      default:
        throw new Error(`Tool not found: ${name}`);
    }
  } catch (error) {
    server.sendLoggingMessage({
      level: 'error',
      data: {
        message: `Request failed: ${
          error instanceof Error ? error.message : String(error)
        }`,
        tool: request.params.name,
        arguments: request.params.arguments,
        timestamp: new Date().toISOString(),
      },
    });
    throw error;
  }
});

// Server startup
async function runServer() {
  const transport = new StdioServerTransport();
  await server.connect(transport);

  // Now that we're connected, we can send logging messages
  server.sendLoggingMessage({
    level: 'info',
    data: 'Vectorize MCP Server initialized successfully',
  });

  server.sendLoggingMessage({
    level: 'info',
    data: `Configuration: Organization ID: ${VECTORIZE_ORG_ID || 'default'}`,
  });

  console.error('Vectorize MCP Server running');
}

runServer().catch((error) => {
  console.error('Fatal error running server:', error);
  process.exit(1);
});