#!/usr/bin/env node
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const mcp_js_1 = require("@modelcontextprotocol/sdk/server/mcp.js");
const stdio_js_1 = require("@modelcontextprotocol/sdk/server/stdio.js");
const zod_1 = require("zod");
const documentLoader_1 = require("./documentLoader");
const textSplitter_1 = require("./textSplitter");
const vectorStore_1 = require("./vectorStore");
const db_1 = require("./db");
const path_1 = __importDefault(require("path"));
const crypto_1 = __importDefault(require("crypto"));
const server = new mcp_js_1.McpServer({
name: "antigravity-pdf-mcp",
version: "1.0.0"
});
const CHUNK_SIZE = 1000;
const CHUNK_OVERLAP = 200;
const TOP_K = 5;
// Helper to check if we are using embeddings
const usingEmbeddings = !!process.env.OPENAI_API_KEY;
// Shared ingestion logic
async function ingestDocument(filePath) {
// Send progress: Started
await server.sendLoggingMessage({
level: "info",
data: `Starting ingestion for ${path_1.default.basename(filePath)}...`
});
// Check if document already exists
const existingDoc = db_1.dbOps.getDocumentByPath(filePath);
if (existingDoc) {
await server.sendLoggingMessage({
level: "notice",
data: `Document already exists: ${path_1.default.basename(filePath)}`
});
return {
content: [
{
type: "text",
text: `Document already exists in library: ${path_1.default.basename(filePath)} (ID: ${existingDoc.id})`
}
]
};
}
await server.sendLoggingMessage({
level: "info",
data: `Loading and parsing document...`
});
const pages = await (0, documentLoader_1.loadDocument)(filePath);
await server.sendLoggingMessage({
level: "info",
data: `Splitting text into chunks...`
});
const chunks = (0, textSplitter_1.recursiveChunkText)(pages, CHUNK_SIZE, CHUNK_OVERLAP);
const documentId = crypto_1.default.randomUUID();
// Save document metadata
db_1.dbOps.addDocument({
id: documentId,
path: filePath,
filename: path_1.default.basename(filePath)
});
if (usingEmbeddings) {
await server.sendLoggingMessage({
level: "info",
data: `Generating embeddings for ${chunks.length} chunks...`
});
const apiKey = process.env.OPENAI_API_KEY;
const texts = chunks.map((c) => c.text);
// Embed in batches if needed, but for now simple batch
const embeddings = await (0, vectorStore_1.embedTextsOpenAI)(texts, apiKey);
embeddings.forEach((e, i) => (chunks[i].embedding = e));
}
else {
await server.sendLoggingMessage({
level: "info",
data: `Computing TF-IDF vectors...`
});
// compute tf-idf vectors (local to this doc for now)
(0, vectorStore_1.computeTfIdfVectors)(chunks);
}
await server.sendLoggingMessage({
level: "info",
data: `Saving to database...`
});
// Add chunks to DB
(0, vectorStore_1.addChunks)(chunks, documentId);
await server.sendLoggingMessage({
level: "notice",
data: `Ingestion complete for ${path_1.default.basename(filePath)}`
});
return {
content: [
{
type: "text",
text: `Successfully ingested document: ${path_1.default.basename(filePath)}. Total chunks: ${chunks.length}. Using embeddings: ${usingEmbeddings}`
}
]
};
}
server.tool("ingest_pdf", "Ingest a PDF file into the knowledge base. (Deprecated: use ingest_document for all file types)", {
path: zod_1.z.string().describe("Absolute path to the PDF file to ingest")
}, async ({ path: filePath }) => {
try {
return await ingestDocument(filePath);
}
catch (error) {
return {
content: [{ type: "text", text: `Error ingesting PDF: ${error.message}` }],
isError: true
};
}
});
server.tool("ingest_document", "Ingest a document (PDF, TXT, MD) into the knowledge base.", {
path: zod_1.z.string().describe("Absolute path to the file to ingest")
}, async ({ path: filePath }) => {
try {
return await ingestDocument(filePath);
}
catch (error) {
return {
content: [{ type: "text", text: `Error ingesting document: ${error.message}` }],
isError: true
};
}
});
server.tool("list_documents", "List all documents currently in the knowledge base.", {}, async () => {
const docs = db_1.dbOps.listDocuments();
if (docs.length === 0) {
return {
content: [{ type: "text", text: "Library is empty." }]
};
}
const list = docs.map(d => `- ${d.filename} (Added: ${d.added_at})`).join("\n");
return {
content: [{ type: "text", text: `Current Documents:\n${list}` }]
};
});
server.tool("reset_library", "Clear all documents and chunks from the knowledge base.", {}, async () => {
(0, vectorStore_1.clearStore)();
return {
content: [{ type: "text", text: "Library reset successfully." }]
};
});
server.tool("query_knowledge_base", "Query the ingested knowledge base for relevant information.", {
query: zod_1.z.string().describe("The query string to search for"),
document_id: zod_1.z.string().optional().describe("Optional: The ID of a specific document to search within")
}, async ({ query, document_id }) => {
try {
if ((0, vectorStore_1.getStoreSize)() === 0) {
return {
content: [
{
type: "text",
text: "Knowledge base is empty. Please ingest a PDF first."
}
],
isError: true
};
}
const filter = document_id ? { documentId: document_id } : undefined;
let passages;
if (usingEmbeddings) {
const apiKey = process.env.OPENAI_API_KEY;
// Use hybrid search for better results
const hits = await (0, vectorStore_1.hybridSearch)(query, TOP_K, filter, apiKey);
passages = hits.map((h) => ({ id: h.c.id, text: h.c.text, score: h.score, page: h.c.pageNumber }));
}
else {
const hits = (0, vectorStore_1.searchByTfIdf)(query, TOP_K, filter);
passages = hits.map((h) => ({ id: h.c.id, text: h.c.text, score: h.score, page: h.c.pageNumber }));
}
const answer = passages.map((p) => {
const citation = p.page ? ` [Page ${p.page}]` : "";
return `[Score: ${p.score.toFixed(4)}]${citation} ${p.text.trim()}`;
}).join('\n\n---\n\n');
return {
content: [
{
type: "text",
text: answer || "No relevant matches found."
}
]
};
}
catch (error) {
return {
content: [
{
type: "text",
text: `Error querying knowledge base: ${error.message}`
}
],
isError: true
};
}
});
async function main() {
try {
// Initialize Database
(0, db_1.initDb)();
console.error("Database initialized successfully.");
}
catch (error) {
console.error("Failed to initialize database:", error);
console.error("Current working directory:", process.cwd());
process.exit(1);
}
const transport = new stdio_js_1.StdioServerTransport();
await server.connect(transport);
console.error("Antigravity PDF MCP Server running on Stdio");
}
main().catch((error) => {
console.error("Fatal error in main():", error);
process.exit(1);
});