from __future__ import annotations
import functools
import traceback
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence
from mcp.server.fastmcp import FastMCP
from . import pdf_tools
from .pdf_tools import PdfToolError
mcp = FastMCP("PDF Handler")
def _wrap_result(result: Any) -> Any:
if isinstance(result, Path):
return str(result)
return result
def _handle_errors(fn):
@functools.wraps(fn)
def wrapper(*args, **kwargs):
try:
return _wrap_result(fn(*args, **kwargs))
except PdfToolError as exc:
return {"error": str(exc)}
except Exception as exc: # pragma: no cover - defensive
return {"error": f"Unexpected error: {exc}", "trace": traceback.format_exc()}
return wrapper
@mcp.tool()
@_handle_errors
def get_pdf_form_fields(pdf_path: str) -> Dict[str, Any]:
"""Return available form fields in the PDF."""
return pdf_tools.get_pdf_form_fields(pdf_path)
@mcp.tool()
@_handle_errors
def fill_pdf_form(
input_path: str,
output_path: str,
data: Dict[str, str],
flatten: bool = False,
) -> Dict[str, Any]:
"""Fill a PDF form with provided data. Optionally flatten to make non-editable."""
return pdf_tools.fill_pdf_form(input_path, output_path, data, flatten)
@mcp.tool()
@_handle_errors
def flatten_pdf(input_path: str, output_path: str) -> Dict[str, Any]:
"""Flatten a PDF (remove form fields/annotations)."""
return pdf_tools.flatten_pdf(input_path, output_path)
@mcp.tool()
@_handle_errors
def clear_pdf_form_fields(
input_path: str,
output_path: str,
fields: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""Clear (delete) values for PDF form fields while keeping fields fillable."""
return pdf_tools.clear_pdf_form_fields(input_path, output_path, fields=fields)
@mcp.tool()
@_handle_errors
def encrypt_pdf(
input_path: str,
output_path: str,
user_password: str,
owner_password: Optional[str] = None,
allow_printing: bool = True,
allow_modifying: bool = False,
allow_copying: bool = False,
allow_annotations: bool = False,
allow_form_filling: bool = True,
use_128bit: bool = True,
) -> Dict[str, Any]:
"""Encrypt (password-protect) a PDF using pypdf."""
return pdf_tools.encrypt_pdf(
input_path=input_path,
output_path=output_path,
user_password=user_password,
owner_password=owner_password,
allow_printing=allow_printing,
allow_modifying=allow_modifying,
allow_copying=allow_copying,
allow_annotations=allow_annotations,
allow_form_filling=allow_form_filling,
use_128bit=use_128bit,
)
@mcp.tool()
@_handle_errors
def merge_pdfs(pdf_list: List[str], output_path: str) -> Dict[str, Any]:
"""Merge multiple PDFs into a single file."""
return pdf_tools.merge_pdfs(pdf_list, output_path)
@mcp.tool()
@_handle_errors
def extract_pages(input_path: str, pages: List[int], output_path: str) -> Dict[str, Any]:
"""Extract specific 1-based pages into a new PDF."""
return pdf_tools.extract_pages(input_path, pages, output_path)
@mcp.tool()
@_handle_errors
def rotate_pages(
input_path: str,
pages: List[int],
degrees: int,
output_path: str,
) -> Dict[str, Any]:
"""Rotate specified 1-based pages by degrees (must be multiple of 90)."""
return pdf_tools.rotate_pages(input_path, pages, degrees, output_path)
@mcp.tool()
@_handle_errors
def add_text_annotation(
input_path: str,
page: int,
text: str,
output_path: str,
rect: Optional[Sequence[float]] = None,
annotation_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Add a FreeText annotation to a page (managed text insertion)."""
return pdf_tools.add_text_annotation(
input_path, page, text, output_path, rect=rect, annotation_id=annotation_id
)
@mcp.tool()
@_handle_errors
def update_text_annotation(
input_path: str,
output_path: str,
annotation_id: str,
text: str,
pages: Optional[List[int]] = None,
) -> Dict[str, Any]:
"""Update an existing annotation by annotation_id."""
return pdf_tools.update_text_annotation(
input_path, output_path, annotation_id, text, pages=pages
)
@mcp.tool()
@_handle_errors
def remove_text_annotation(
input_path: str,
output_path: str,
annotation_id: str,
pages: Optional[List[int]] = None,
) -> Dict[str, Any]:
"""Remove an existing annotation by annotation_id."""
return pdf_tools.remove_text_annotation(input_path, output_path, annotation_id, pages=pages)
@mcp.tool()
@_handle_errors
def remove_annotations(
input_path: str,
output_path: str,
pages: List[int],
subtype: Optional[str] = None,
) -> Dict[str, Any]:
"""Remove annotations from given pages. Optionally filter by subtype (e.g., FreeText)."""
return pdf_tools.remove_annotations(input_path, output_path, pages, subtype=subtype)
@mcp.tool()
@_handle_errors
def insert_pages(
input_path: str,
insert_from_path: str,
at_page: int,
output_path: str,
) -> Dict[str, Any]:
"""Insert pages from another PDF before at_page (1-based)."""
return pdf_tools.insert_pages(input_path, insert_from_path, at_page, output_path)
@mcp.tool()
@_handle_errors
def remove_pages(input_path: str, pages: List[int], output_path: str) -> Dict[str, Any]:
"""Remove specified 1-based pages from a PDF."""
return pdf_tools.remove_pages(input_path, pages, output_path)
@mcp.tool()
@_handle_errors
def insert_text(
input_path: str,
page: int,
text: str,
output_path: str,
rect: Optional[Sequence[float]] = None,
text_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Insert text via a managed FreeText annotation."""
return pdf_tools.insert_text(input_path, page, text, output_path, rect=rect, text_id=text_id)
@mcp.tool()
@_handle_errors
def edit_text(
input_path: str,
output_path: str,
text_id: str,
text: str,
pages: Optional[List[int]] = None,
) -> Dict[str, Any]:
"""Edit managed inserted text."""
return pdf_tools.edit_text(input_path, output_path, text_id, text, pages=pages)
@mcp.tool()
@_handle_errors
def remove_text(
input_path: str,
output_path: str,
text_id: str,
pages: Optional[List[int]] = None,
) -> Dict[str, Any]:
"""Remove managed inserted text."""
return pdf_tools.remove_text(input_path, output_path, text_id, pages=pages)
@mcp.tool()
@_handle_errors
def get_pdf_metadata(pdf_path: str) -> Dict[str, Any]:
"""Get basic PDF document metadata."""
return pdf_tools.get_pdf_metadata(pdf_path)
@mcp.tool()
@_handle_errors
def set_pdf_metadata(
input_path: str,
output_path: str,
title: Optional[str] = None,
author: Optional[str] = None,
subject: Optional[str] = None,
keywords: Optional[str] = None,
) -> Dict[str, Any]:
"""Set basic PDF document metadata (title, author, subject, keywords)."""
return pdf_tools.set_pdf_metadata(
input_path,
output_path,
title=title,
author=author,
subject=subject,
keywords=keywords,
)
@mcp.tool()
@_handle_errors
def add_text_watermark(
input_path: str,
output_path: str,
text: str,
pages: Optional[List[int]] = None,
rect: Optional[Sequence[float]] = None,
annotation_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Add a simple text watermark or stamp via FreeText annotations."""
return pdf_tools.add_text_watermark(
input_path,
output_path,
text,
pages=pages,
rect=rect,
annotation_id=annotation_id,
)
@mcp.tool()
@_handle_errors
def add_comment(
input_path: str,
output_path: str,
page: int,
text: str,
pos: Sequence[float],
comment_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Add a PDF comment (sticky note) using PyMuPDF."""
return pdf_tools.add_comment(
input_path=input_path,
output_path=output_path,
page=page,
text=text,
pos=pos,
comment_id=comment_id,
)
@mcp.tool()
@_handle_errors
def update_comment(
input_path: str,
output_path: str,
comment_id: str,
text: str,
pages: Optional[List[int]] = None,
) -> Dict[str, Any]:
"""Update a PDF comment by id using PyMuPDF."""
return pdf_tools.update_comment(
input_path=input_path,
output_path=output_path,
comment_id=comment_id,
text=text,
pages=pages,
)
@mcp.tool()
@_handle_errors
def remove_comment(
input_path: str,
output_path: str,
comment_id: str,
pages: Optional[List[int]] = None,
) -> Dict[str, Any]:
"""Remove a PDF comment by id using PyMuPDF."""
return pdf_tools.remove_comment(
input_path=input_path,
output_path=output_path,
comment_id=comment_id,
pages=pages,
)
@mcp.tool()
@_handle_errors
def add_signature_image(
input_path: str,
output_path: str,
page: int,
image_path: str,
rect: Sequence[float],
) -> Dict[str, Any]:
"""Add a signature image by inserting it on a page (PyMuPDF)."""
return pdf_tools.add_signature_image(
input_path=input_path,
output_path=output_path,
page=page,
image_path=image_path,
rect=rect,
)
@mcp.tool()
@_handle_errors
def update_signature_image(
input_path: str,
output_path: str,
page: int,
signature_xref: int,
image_path: Optional[str] = None,
rect: Optional[Sequence[float]] = None,
) -> Dict[str, Any]:
"""Update or resize a signature image (PyMuPDF)."""
return pdf_tools.update_signature_image(
input_path=input_path,
output_path=output_path,
page=page,
signature_xref=signature_xref,
image_path=image_path,
rect=rect,
)
@mcp.tool()
@_handle_errors
def remove_signature_image(
input_path: str,
output_path: str,
page: int,
signature_xref: int,
) -> Dict[str, Any]:
"""Remove a signature image by xref (PyMuPDF)."""
return pdf_tools.remove_signature_image(
input_path=input_path,
output_path=output_path,
page=page,
signature_xref=signature_xref,
)
# =============================================================================
# OCR and Text Extraction Tools
# =============================================================================
@mcp.tool()
@_handle_errors
def detect_pdf_type(pdf_path: str) -> Dict[str, Any]:
"""
Analyze a PDF to classify its content type.
Returns:
- classification: "searchable", "image_based", or "hybrid"
- needs_ocr: Whether OCR is recommended for full text extraction
- Detailed page-by-page analysis with text/image metrics
"""
return pdf_tools.detect_pdf_type(pdf_path)
@mcp.tool()
@_handle_errors
def extract_text_native(
pdf_path: str,
pages: Optional[List[int]] = None,
) -> Dict[str, Any]:
"""
Extract text from PDF using native text layer only (no OCR).
Fast extraction for PDFs with embedded text. Use detect_pdf_type first
to determine if the PDF has sufficient native text.
"""
return pdf_tools.extract_text_native(pdf_path, pages=pages)
@mcp.tool()
@_handle_errors
def extract_text_ocr(
pdf_path: str,
pages: Optional[List[int]] = None,
engine: str = "auto",
dpi: int = 300,
language: str = "eng",
) -> Dict[str, Any]:
"""
Extract text from PDF with OCR support.
Engine options:
- "auto": Try native extraction first; fall back to OCR if insufficient
- "native": Use only native text extraction (no OCR)
- "tesseract": Force OCR using Tesseract
- "force_ocr": Always use OCR even if native text exists
Requires tesseract-ocr to be installed for OCR functionality.
"""
return pdf_tools.extract_text_ocr(
pdf_path,
pages=pages,
engine=engine,
dpi=dpi,
language=language,
)
@mcp.tool()
@_handle_errors
def get_pdf_text_blocks(
pdf_path: str,
pages: Optional[List[int]] = None,
) -> Dict[str, Any]:
"""
Extract text blocks with position information from PDF.
Returns structured text blocks with bounding boxes, useful for
understanding document layout and identifying form field locations.
"""
return pdf_tools.get_pdf_text_blocks(pdf_path, pages=pages)
if __name__ == "__main__":
mcp.run(transport="stdio")