#!/usr/bin/env node
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { z } from "zod";
import {
extractText,
extractPageRange,
getPdfInfo,
searchInPdf,
getWordCount,
getWordFrequency,
} from "./pdf.js";
const server = new McpServer({
name: "mcp-pdf-tools",
version: "1.0.0",
});
server.tool(
"pdf_info",
"Get metadata and statistics about a PDF file (pages, author, title, text length)",
{
file_path: z.string().describe("Absolute path to the PDF file"),
},
async ({ file_path }) => {
const info = await getPdfInfo(file_path);
const lines = [
`Title: ${info.title || "(none)"}`,
`Author: ${info.author || "(none)"}`,
`Pages: ${info.pages}`,
`Text length: ${info.textLength.toLocaleString()} characters`,
`Creator: ${info.creator || "(none)"}`,
`Producer: ${info.producer || "(none)"}`,
];
return { content: [{ type: "text" as const, text: lines.join("\n") }] };
},
);
server.tool(
"pdf_extract_text",
"Extract all text content from a PDF file",
{
file_path: z.string().describe("Absolute path to the PDF file"),
max_chars: z.number().optional().default(50000).describe("Maximum characters to return"),
},
async ({ file_path, max_chars }) => {
const result = await extractText(file_path);
let text = result.text;
if (text.length > max_chars) {
text = text.slice(0, max_chars) + "\n\n... (truncated)";
}
return {
content: [
{
type: "text" as const,
text: `Extracted from ${result.pages} page(s):\n\n${text}`,
},
],
};
},
);
server.tool(
"pdf_extract_pages",
"Extract text from a specific page range of a PDF",
{
file_path: z.string().describe("Absolute path to the PDF file"),
start_page: z.number().describe("Start page (1-indexed)"),
end_page: z.number().describe("End page (inclusive)"),
},
async ({ file_path, start_page, end_page }) => {
const text = await extractPageRange(file_path, start_page, end_page);
return {
content: [
{
type: "text" as const,
text: `Pages ${start_page}-${end_page}:\n\n${text}`,
},
],
};
},
);
server.tool(
"pdf_search",
"Search for text within a PDF file",
{
file_path: z.string().describe("Absolute path to the PDF file"),
query: z.string().describe("Text to search for"),
max_results: z.number().optional().default(20).describe("Maximum results to return"),
},
async ({ file_path, query, max_results }) => {
const result = await extractText(file_path);
const matches = searchInPdf(result.text, query).slice(0, max_results);
if (matches.length === 0) {
return {
content: [{ type: "text" as const, text: `No matches found for "${query}"` }],
};
}
const text = [
`Found ${matches.length} match(es) for "${query}":`,
"",
...matches.map(
(m, i) => `${i + 1}. Line ${m.line}:\n${m.context}\n`,
),
].join("\n");
return { content: [{ type: "text" as const, text }] };
},
);
server.tool(
"pdf_word_stats",
"Get word count and top word frequencies from a PDF",
{
file_path: z.string().describe("Absolute path to the PDF file"),
top_n: z.number().optional().default(20).describe("Number of top words to return"),
},
async ({ file_path, top_n }) => {
const result = await extractText(file_path);
const wordCount = getWordCount(result.text);
const topWords = getWordFrequency(result.text, top_n);
const text = [
`Total words: ${wordCount.toLocaleString()}`,
`Pages: ${result.pages}`,
"",
"Top words:",
...topWords.map((w) => ` ${w.word}: ${w.count}`),
].join("\n");
return { content: [{ type: "text" as const, text }] };
},
);
async function main() {
const transport = new StdioServerTransport();
await server.connect(transport);
}
main().catch(console.error);