PDF MCP Server

Overview Schema Related Servers Score Discussions

pdf-mcp-server
pdf_mcp

server.py•12.2 KiB

from __future__ import annotations import functools import traceback from pathlib import Path from typing import Any, Dict, List, Optional, Sequence from mcp.server.fastmcp import FastMCP from . import pdf_tools from .pdf_tools import PdfToolError mcp = FastMCP("PDF Handler") def _wrap_result(result: Any) -> Any: if isinstance(result, Path): return str(result) return result def _handle_errors(fn): @functools.wraps(fn) def wrapper(*args, **kwargs): try: return _wrap_result(fn(*args, **kwargs)) except PdfToolError as exc: return {"error": str(exc)} except Exception as exc: # pragma: no cover - defensive return {"error": f"Unexpected error: {exc}", "trace": traceback.format_exc()} return wrapper @mcp.tool() @_handle_errors def get_pdf_form_fields(pdf_path: str) -> Dict[str, Any]: """Return available form fields in the PDF.""" return pdf_tools.get_pdf_form_fields(pdf_path) @mcp.tool() @_handle_errors def fill_pdf_form( input_path: str, output_path: str, data: Dict[str, str], flatten: bool = False, ) -> Dict[str, Any]: """Fill a PDF form with provided data. Optionally flatten to make non-editable.""" return pdf_tools.fill_pdf_form(input_path, output_path, data, flatten) @mcp.tool() @_handle_errors def flatten_pdf(input_path: str, output_path: str) -> Dict[str, Any]: """Flatten a PDF (remove form fields/annotations).""" return pdf_tools.flatten_pdf(input_path, output_path) @mcp.tool() @_handle_errors def clear_pdf_form_fields( input_path: str, output_path: str, fields: Optional[List[str]] = None, ) -> Dict[str, Any]: """Clear (delete) values for PDF form fields while keeping fields fillable.""" return pdf_tools.clear_pdf_form_fields(input_path, output_path, fields=fields) @mcp.tool() @_handle_errors def encrypt_pdf( input_path: str, output_path: str, user_password: str, owner_password: Optional[str] = None, allow_printing: bool = True, allow_modifying: bool = False, allow_copying: bool = False, allow_annotations: bool = False, allow_form_filling: bool = True, use_128bit: bool = True, ) -> Dict[str, Any]: """Encrypt (password-protect) a PDF using pypdf.""" return pdf_tools.encrypt_pdf( input_path=input_path, output_path=output_path, user_password=user_password, owner_password=owner_password, allow_printing=allow_printing, allow_modifying=allow_modifying, allow_copying=allow_copying, allow_annotations=allow_annotations, allow_form_filling=allow_form_filling, use_128bit=use_128bit, ) @mcp.tool() @_handle_errors def merge_pdfs(pdf_list: List[str], output_path: str) -> Dict[str, Any]: """Merge multiple PDFs into a single file.""" return pdf_tools.merge_pdfs(pdf_list, output_path) @mcp.tool() @_handle_errors def extract_pages(input_path: str, pages: List[int], output_path: str) -> Dict[str, Any]: """Extract specific 1-based pages into a new PDF.""" return pdf_tools.extract_pages(input_path, pages, output_path) @mcp.tool() @_handle_errors def rotate_pages( input_path: str, pages: List[int], degrees: int, output_path: str, ) -> Dict[str, Any]: """Rotate specified 1-based pages by degrees (must be multiple of 90).""" return pdf_tools.rotate_pages(input_path, pages, degrees, output_path) @mcp.tool() @_handle_errors def add_text_annotation( input_path: str, page: int, text: str, output_path: str, rect: Optional[Sequence[float]] = None, annotation_id: Optional[str] = None, ) -> Dict[str, Any]: """Add a FreeText annotation to a page (managed text insertion).""" return pdf_tools.add_text_annotation( input_path, page, text, output_path, rect=rect, annotation_id=annotation_id ) @mcp.tool() @_handle_errors def update_text_annotation( input_path: str, output_path: str, annotation_id: str, text: str, pages: Optional[List[int]] = None, ) -> Dict[str, Any]: """Update an existing annotation by annotation_id.""" return pdf_tools.update_text_annotation( input_path, output_path, annotation_id, text, pages=pages ) @mcp.tool() @_handle_errors def remove_text_annotation( input_path: str, output_path: str, annotation_id: str, pages: Optional[List[int]] = None, ) -> Dict[str, Any]: """Remove an existing annotation by annotation_id.""" return pdf_tools.remove_text_annotation(input_path, output_path, annotation_id, pages=pages) @mcp.tool() @_handle_errors def remove_annotations( input_path: str, output_path: str, pages: List[int], subtype: Optional[str] = None, ) -> Dict[str, Any]: """Remove annotations from given pages. Optionally filter by subtype (e.g., FreeText).""" return pdf_tools.remove_annotations(input_path, output_path, pages, subtype=subtype) @mcp.tool() @_handle_errors def insert_pages( input_path: str, insert_from_path: str, at_page: int, output_path: str, ) -> Dict[str, Any]: """Insert pages from another PDF before at_page (1-based).""" return pdf_tools.insert_pages(input_path, insert_from_path, at_page, output_path) @mcp.tool() @_handle_errors def remove_pages(input_path: str, pages: List[int], output_path: str) -> Dict[str, Any]: """Remove specified 1-based pages from a PDF.""" return pdf_tools.remove_pages(input_path, pages, output_path) @mcp.tool() @_handle_errors def insert_text( input_path: str, page: int, text: str, output_path: str, rect: Optional[Sequence[float]] = None, text_id: Optional[str] = None, ) -> Dict[str, Any]: """Insert text via a managed FreeText annotation.""" return pdf_tools.insert_text(input_path, page, text, output_path, rect=rect, text_id=text_id) @mcp.tool() @_handle_errors def edit_text( input_path: str, output_path: str, text_id: str, text: str, pages: Optional[List[int]] = None, ) -> Dict[str, Any]: """Edit managed inserted text.""" return pdf_tools.edit_text(input_path, output_path, text_id, text, pages=pages) @mcp.tool() @_handle_errors def remove_text( input_path: str, output_path: str, text_id: str, pages: Optional[List[int]] = None, ) -> Dict[str, Any]: """Remove managed inserted text.""" return pdf_tools.remove_text(input_path, output_path, text_id, pages=pages) @mcp.tool() @_handle_errors def get_pdf_metadata(pdf_path: str) -> Dict[str, Any]: """Get basic PDF document metadata.""" return pdf_tools.get_pdf_metadata(pdf_path) @mcp.tool() @_handle_errors def set_pdf_metadata( input_path: str, output_path: str, title: Optional[str] = None, author: Optional[str] = None, subject: Optional[str] = None, keywords: Optional[str] = None, ) -> Dict[str, Any]: """Set basic PDF document metadata (title, author, subject, keywords).""" return pdf_tools.set_pdf_metadata( input_path, output_path, title=title, author=author, subject=subject, keywords=keywords, ) @mcp.tool() @_handle_errors def add_text_watermark( input_path: str, output_path: str, text: str, pages: Optional[List[int]] = None, rect: Optional[Sequence[float]] = None, annotation_id: Optional[str] = None, ) -> Dict[str, Any]: """Add a simple text watermark or stamp via FreeText annotations.""" return pdf_tools.add_text_watermark( input_path, output_path, text, pages=pages, rect=rect, annotation_id=annotation_id, ) @mcp.tool() @_handle_errors def add_comment( input_path: str, output_path: str, page: int, text: str, pos: Sequence[float], comment_id: Optional[str] = None, ) -> Dict[str, Any]: """Add a PDF comment (sticky note) using PyMuPDF.""" return pdf_tools.add_comment( input_path=input_path, output_path=output_path, page=page, text=text, pos=pos, comment_id=comment_id, ) @mcp.tool() @_handle_errors def update_comment( input_path: str, output_path: str, comment_id: str, text: str, pages: Optional[List[int]] = None, ) -> Dict[str, Any]: """Update a PDF comment by id using PyMuPDF.""" return pdf_tools.update_comment( input_path=input_path, output_path=output_path, comment_id=comment_id, text=text, pages=pages, ) @mcp.tool() @_handle_errors def remove_comment( input_path: str, output_path: str, comment_id: str, pages: Optional[List[int]] = None, ) -> Dict[str, Any]: """Remove a PDF comment by id using PyMuPDF.""" return pdf_tools.remove_comment( input_path=input_path, output_path=output_path, comment_id=comment_id, pages=pages, ) @mcp.tool() @_handle_errors def add_signature_image( input_path: str, output_path: str, page: int, image_path: str, rect: Sequence[float], ) -> Dict[str, Any]: """Add a signature image by inserting it on a page (PyMuPDF).""" return pdf_tools.add_signature_image( input_path=input_path, output_path=output_path, page=page, image_path=image_path, rect=rect, ) @mcp.tool() @_handle_errors def update_signature_image( input_path: str, output_path: str, page: int, signature_xref: int, image_path: Optional[str] = None, rect: Optional[Sequence[float]] = None, ) -> Dict[str, Any]: """Update or resize a signature image (PyMuPDF).""" return pdf_tools.update_signature_image( input_path=input_path, output_path=output_path, page=page, signature_xref=signature_xref, image_path=image_path, rect=rect, ) @mcp.tool() @_handle_errors def remove_signature_image( input_path: str, output_path: str, page: int, signature_xref: int, ) -> Dict[str, Any]: """Remove a signature image by xref (PyMuPDF).""" return pdf_tools.remove_signature_image( input_path=input_path, output_path=output_path, page=page, signature_xref=signature_xref, ) # ============================================================================= # OCR and Text Extraction Tools # ============================================================================= @mcp.tool() @_handle_errors def detect_pdf_type(pdf_path: str) -> Dict[str, Any]: """ Analyze a PDF to classify its content type. Returns: - classification: "searchable", "image_based", or "hybrid" - needs_ocr: Whether OCR is recommended for full text extraction - Detailed page-by-page analysis with text/image metrics """ return pdf_tools.detect_pdf_type(pdf_path) @mcp.tool() @_handle_errors def extract_text_native( pdf_path: str, pages: Optional[List[int]] = None, ) -> Dict[str, Any]: """ Extract text from PDF using native text layer only (no OCR). Fast extraction for PDFs with embedded text. Use detect_pdf_type first to determine if the PDF has sufficient native text. """ return pdf_tools.extract_text_native(pdf_path, pages=pages) @mcp.tool() @_handle_errors def extract_text_ocr( pdf_path: str, pages: Optional[List[int]] = None, engine: str = "auto", dpi: int = 300, language: str = "eng", ) -> Dict[str, Any]: """ Extract text from PDF with OCR support. Engine options: - "auto": Try native extraction first; fall back to OCR if insufficient - "native": Use only native text extraction (no OCR) - "tesseract": Force OCR using Tesseract - "force_ocr": Always use OCR even if native text exists Requires tesseract-ocr to be installed for OCR functionality. """ return pdf_tools.extract_text_ocr( pdf_path, pages=pages, engine=engine, dpi=dpi, language=language, ) @mcp.tool() @_handle_errors def get_pdf_text_blocks( pdf_path: str, pages: Optional[List[int]] = None, ) -> Dict[str, Any]: """ Extract text blocks with position information from PDF. Returns structured text blocks with bounding boxes, useful for understanding document layout and identifying form field locations. """ return pdf_tools.get_pdf_text_blocks(pdf_path, pages=pages) if __name__ == "__main__": mcp.run(transport="stdio")

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/nfsarch33/pdf-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

server.py•12.2 KiB