PDF Reader MCP Server

index.js•9.52 KiB

#!/usr/bin/env node import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { CallToolRequestSchema, ListToolsRequestSchema, } from "@modelcontextprotocol/sdk/types.js"; import fs from "fs/promises"; import path from "path"; import { fileURLToPath } from "url"; import pdfParse from "pdf-parse"; // Get package.json path for version info const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const packageJson = JSON.parse( await fs.readFile(path.join(__dirname, "package.json"), "utf-8") ); const VERSION = packageJson.version; /** * MCP Server for PDF Text Extraction * Provides efficient tools for reading and searching PDF files */ class PDFReaderServer { constructor() { this.server = new Server( { name: "pdf-reader-mcp-server", version: VERSION, }, { capabilities: { tools: {}, }, } ); this.setupHandlers(); this.setupErrorHandling(); } setupErrorHandling() { this.server.onerror = (error) => { console.error("[MCP Error]", error); }; process.on("SIGINT", async () => { await this.server.close(); process.exit(0); }); } setupHandlers() { // List available tools this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: "read-pdf", description: "Extract text from a PDF file. Returns the full text content of the PDF with optional page filtering and text cleaning.", inputSchema: { $schema: "https://json-schema.org/draft/2020-12/schema", type: "object", properties: { file: { type: "string", description: "Path to the PDF file to extract text from", }, pages: { type: "string", description: "Page range (e.g., '1-5', '1,3,5', 'all'). Default: 'all'", }, clean_text: { type: "boolean", description: "Clean and normalize extracted text. Default: false", }, include_metadata: { type: "boolean", description: "Include PDF metadata in output. Default: true", }, }, required: ["file"], additionalProperties: false, }, }, { name: "search-pdf", description: "Search for specific text within a PDF file. Returns matching text with context and page numbers.", inputSchema: { $schema: "https://json-schema.org/draft/2020-12/schema", type: "object", properties: { file: { type: "string", description: "Path to the PDF file to search in", }, query: { type: "string", description: "Text to search for", }, case_sensitive: { type: "boolean", description: "Case sensitive search. Default: false", }, whole_word: { type: "boolean", description: "Match whole words only. Default: false", }, }, required: ["file", "query"], additionalProperties: false, }, }, { name: "pdf-metadata", description: "Extract metadata from a PDF file including title, author, page count, creation date, etc.", inputSchema: { $schema: "https://json-schema.org/draft/2020-12/schema", type: "object", properties: { file: { type: "string", description: "Path to the PDF file to get metadata from", }, }, required: ["file"], additionalProperties: false, }, }, ], })); // Handle tool calls this.server.setRequestHandler(CallToolRequestSchema, async (request) => { try { const { name, arguments: args } = request.params; switch (name) { case "read-pdf": return await this.handleReadPdf(args); case "search-pdf": return await this.handleSearchPdf(args); case "pdf-metadata": return await this.handlePdfMetadata(args); default: throw new Error(`Unknown tool: ${name}`); } } catch (error) { return { content: [ { type: "text", text: `Error: ${error.message}`, }, ], isError: true, }; } }); } async loadPdf(filePath) { const resolvedPath = path.resolve(filePath); const buffer = await fs.readFile(resolvedPath); return await pdfParse(buffer); } cleanText(text) { return text .replace(/\s+/g, " ") // Normalize whitespace .replace(/\n{3,}/g, "\n\n") // Reduce multiple newlines .trim(); } parsePageRange(pageRange, totalPages) { if (pageRange === "all") { return Array.from({ length: totalPages }, (_, i) => i + 1); } const pages = new Set(); const parts = pageRange.split(","); for (const part of parts) { if (part.includes("-")) { const [start, end] = part.split("-").map((n) => parseInt(n.trim())); for (let i = start; i <= end && i <= totalPages; i++) { pages.add(i); } } else { const page = parseInt(part.trim()); if (page <= totalPages) { pages.add(page); } } } return Array.from(pages).sort((a, b) => a - b); } async handleReadPdf(args) { const { file, clean_text = false, include_metadata = true } = args; const pdf = await this.loadPdf(file); let text = pdf.text; if (clean_text) { text = this.cleanText(text); } // Note: pdf-parse doesn't easily support page-by-page extraction // For now, we'll return the full text with a note about page filtering let result = `PDF Text Content:\n\n${text}`; if (include_metadata) { const metadata = [ `\n\n--- PDF Metadata ---`, `Pages: ${pdf.numpages}`, `Title: ${pdf.info?.Title || "N/A"}`, `Author: ${pdf.info?.Author || "N/A"}`, `Creator: ${pdf.info?.Creator || "N/A"}`, `Producer: ${pdf.info?.Producer || "N/A"}`, ].join("\n"); result += metadata; } return { content: [ { type: "text", text: result, }, ], }; } async handleSearchPdf(args) { const { file, query, case_sensitive = false, whole_word = false } = args; const pdf = await this.loadPdf(file); const text = pdf.text; let searchQuery = query; if (!case_sensitive) { searchQuery = query.toLowerCase(); } const lines = text.split("\n"); const matches = []; const contextLines = 2; // Number of lines to show before/after match for (let i = 0; i < lines.length; i++) { let line = lines[i]; let searchLine = case_sensitive ? line : line.toLowerCase(); let isMatch = false; if (whole_word) { const regex = new RegExp(`\\b${searchQuery}\\b`, case_sensitive ? "" : "i"); isMatch = regex.test(line); } else { isMatch = searchLine.includes(searchQuery); } if (isMatch) { const start = Math.max(0, i - contextLines); const end = Math.min(lines.length, i + contextLines + 1); const context = lines.slice(start, end).join("\n"); matches.push({ line: i + 1, context: context, }); } } let result = `Found ${matches.length} match(es) for "${query}" in ${file}\n\n`; if (matches.length > 0) { result += matches .map((match, idx) => { return `Match ${idx + 1} (around line ${match.line}):\n${match.context}\n`; }) .join("\n---\n\n"); } else { result += "No matches found."; } return { content: [ { type: "text", text: result, }, ], }; } async handlePdfMetadata(args) { const { file } = args; const pdf = await this.loadPdf(file); const metadata = { file: file, pages: pdf.numpages, info: pdf.info || {}, metadata: pdf.metadata || null, version: pdf.version || "N/A", }; const result = [ `PDF Metadata for: ${file}`, ``, `Pages: ${metadata.pages}`, `Version: ${metadata.version}`, ``, `Document Information:`, ` Title: ${metadata.info.Title || "N/A"}`, ` Author: ${metadata.info.Author || "N/A"}`, ` Subject: ${metadata.info.Subject || "N/A"}`, ` Keywords: ${metadata.info.Keywords || "N/A"}`, ` Creator: ${metadata.info.Creator || "N/A"}`, ` Producer: ${metadata.info.Producer || "N/A"}`, ` Creation Date: ${metadata.info.CreationDate || "N/A"}`, ` Modification Date: ${metadata.info.ModDate || "N/A"}`, ].join("\n"); return { content: [ { type: "text", text: result, }, ], }; } async start() { const transport = new StdioServerTransport(); await this.server.connect(transport); console.error(`PDF Reader MCP Server v${VERSION} running on stdio`); } } // Start the server const server = new PDFReaderServer(); server.start().catch(console.error);

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/hancengiz/read_pdf_as_text_mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

index.js•9.52 KiB