MCP PDF Reader

index.ts•8.51 KiB

#!/usr/bin/env node import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ListToolsRequestSchema, Tool, } from '@modelcontextprotocol/sdk/types.js'; import { extractTextFromPDF, extractMetadata, extractTextFromPages, searchInPDF, getPageCount, listPDFImages, extractPDFImage, } from './pdf-tools.js'; import type { PageRange } from './types.js'; // Define available tools const TOOLS: Tool[] = [ { name: 'read_pdf', description: 'Extract all text content from a PDF file', inputSchema: { type: 'object', properties: { filePath: { type: 'string', description: 'Absolute path to the PDF file', }, includeMetadata: { type: 'boolean', description: 'Whether to include PDF metadata in the response', default: false, }, }, required: ['filePath'], }, }, { name: 'get_pdf_metadata', description: 'Extract metadata information from a PDF file (title, author, creation date, etc.)', inputSchema: { type: 'object', properties: { filePath: { type: 'string', description: 'Absolute path to the PDF file', }, }, required: ['filePath'], }, }, { name: 'read_pdf_pages', description: 'Extract text from specific pages or page range in a PDF', inputSchema: { type: 'object', properties: { filePath: { type: 'string', description: 'Absolute path to the PDF file', }, startPage: { type: 'number', description: 'Starting page number (1-indexed)', }, endPage: { type: 'number', description: 'Ending page number (optional, defaults to startPage)', }, }, required: ['filePath', 'startPage'], }, }, { name: 'search_pdf', description: 'Search for text in a PDF and return all matches with context', inputSchema: { type: 'object', properties: { filePath: { type: 'string', description: 'Absolute path to the PDF file', }, searchTerm: { type: 'string', description: 'Text to search for in the PDF', }, caseSensitive: { type: 'boolean', description: 'Whether the search should be case sensitive', default: false, }, }, required: ['filePath', 'searchTerm'], }, }, { name: 'get_pdf_page_count', description: 'Get the total number of pages in a PDF file', inputSchema: { type: 'object', properties: { filePath: { type: 'string', description: 'Absolute path to the PDF file', }, }, required: ['filePath'], }, }, { name: 'list_pdf_images', description: 'List all images embedded in a PDF file with their metadata (page, dimensions, type)', inputSchema: { type: 'object', properties: { filePath: { type: 'string', description: 'Absolute path to the PDF file', }, }, required: ['filePath'], }, }, { name: 'extract_pdf_image', description: 'Extract a specific image from a PDF by its index (use list_pdf_images to get available indices)', inputSchema: { type: 'object', properties: { filePath: { type: 'string', description: 'Absolute path to the PDF file', }, imageIndex: { type: 'number', description: 'Index of the image to extract (0-based, from list_pdf_images)', }, }, required: ['filePath', 'imageIndex'], }, }, ]; // Create server instance const server = new Server( { name: 'mcp-pdf-reader', version: '1.0.0', }, { capabilities: { tools: {}, }, } ); // Handle tool listing server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: TOOLS, }; }); // Handle tool execution server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { switch (name) { case 'read_pdf': { const { filePath, includeMetadata } = args as { filePath: string; includeMetadata?: boolean; }; const result = await extractTextFromPDF(filePath); if (includeMetadata) { return { content: [ { type: 'text', text: JSON.stringify(result, null, 2), }, ], }; } return { content: [ { type: 'text', text: result.text, }, ], }; } case 'get_pdf_metadata': { const { filePath } = args as { filePath: string }; const metadata = await extractMetadata(filePath); return { content: [ { type: 'text', text: JSON.stringify(metadata, null, 2), }, ], }; } case 'read_pdf_pages': { const { filePath, startPage, endPage } = args as { filePath: string; startPage: number; endPage?: number; }; const pageRange: PageRange = { start: startPage, end: endPage ?? undefined, }; const text = await extractTextFromPages(filePath, pageRange); return { content: [ { type: 'text', text, }, ], }; } case 'search_pdf': { const { filePath, searchTerm, caseSensitive } = args as { filePath: string; searchTerm: string; caseSensitive?: boolean; }; const results = await searchInPDF(filePath, searchTerm, caseSensitive); return { content: [ { type: 'text', text: JSON.stringify(results, null, 2), }, ], }; } case 'get_pdf_page_count': { const { filePath } = args as { filePath: string }; const pageCount = await getPageCount(filePath); return { content: [ { type: 'text', text: `Total pages: ${pageCount}`, }, ], }; } case 'list_pdf_images': { const { filePath } = args as { filePath: string }; const images = await listPDFImages(filePath); return { content: [ { type: 'text', text: JSON.stringify({ totalImages: images.length, images: images.map(img => ({ index: img.index, page: img.page, name: img.name, dimensions: `${img.width}x${img.height}`, type: img.type, })), }, null, 2), }, ], }; } case 'extract_pdf_image': { const { filePath, imageIndex } = args as { filePath: string; imageIndex: number; }; const image = await extractPDFImage(filePath, imageIndex); return { content: [ { type: 'text', text: JSON.stringify({ index: image.index, page: image.page, name: image.name, width: image.width, height: image.height, type: image.type, dataSize: image.data.length, data: image.data, // Base64 encoded image note: 'Image data is Base64 encoded. Decode to save as file.', }, null, 2), }, ], }; } default: throw new Error(`Unknown tool: ${name}`); } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); return { content: [ { type: 'text', text: `Error: ${errorMessage}`, }, ], isError: true, }; } }); // Start server async function main() { const transport = new StdioServerTransport(); await server.connect(transport); console.error('MCP PDF Reader server running on stdio'); } main().catch((error) => { console.error('Server error:', error); process.exit(1); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/rturv/mcp-pdf-reader'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

index.ts•8.51 KiB