OwlOCR MCP

Overview Schema Related Servers Score Discussions

owlocr-mcp
src
owlocr_mcp

pdf.py•3.34 KiB

"""PDF processing module for page-by-page OCR.""" from __future__ import annotations from dataclasses import dataclass from pathlib import Path import pypdfium2 as pdfium from PIL import Image from .ocr import OCRResult, ocr_image_pil @dataclass class PDFOCRConfig: """Configuration for PDF OCR.""" dpi: int = 200 languages: list[str] | None = None page_separator: str = "\n\n===== Page {page} =====\n\n" def render_pdf_page_to_image( pdf_doc: pdfium.PdfDocument, page_index: int, dpi: int = 200 ) -> Image.Image: """ Render a single PDF page to a PIL Image. Args: pdf_doc: pypdfium2 PDF document page_index: 0-based page index dpi: Resolution for rendering Returns: PIL Image of the rendered page """ page = pdf_doc[page_index] scale = dpi / 72 # PDF default is 72 DPI bitmap = page.render(scale=scale) pil_image = bitmap.to_pil() return pil_image def ocr_pdf( pdf_path: Path, config: PDFOCRConfig | None = None, pages: list[int] | None = None, ) -> tuple[str, list[OCRResult]]: """ Perform OCR on a PDF file, extracting text from all or specific pages. Args: pdf_path: Path to the PDF file config: OCR configuration options pages: Optional list of 1-based page numbers to process. If None, all pages are processed. Returns: Tuple of (combined_text, list of OCRResult per page) """ if config is None: config = PDFOCRConfig() pdf_doc = pdfium.PdfDocument(str(pdf_path)) total_pages = len(pdf_doc) # Determine which pages to process (convert to 0-based indices) if pages is not None: page_indices = [p - 1 for p in pages if 1 <= p <= total_pages] else: page_indices = list(range(total_pages)) results: list[OCRResult] = [] text_parts: list[str] = [] for idx in page_indices: page_num = idx + 1 # 1-based for display # Render page to image pil_image = render_pdf_page_to_image(pdf_doc, idx, config.dpi) # Perform OCR page_text = ocr_image_pil(pil_image, config.languages) results.append( OCRResult( page_number=page_num, text=page_text, confidence=1.0, # Vision framework doesn't expose overall confidence ) ) # Add separator before text (except for first page) if text_parts: text_parts.append(config.page_separator.format(page=page_num)) text_parts.append(page_text) combined_text = "".join(text_parts) return combined_text, results def ocr_pdf_to_file( pdf_path: Path, output_path: Path | None = None, config: PDFOCRConfig | None = None, pages: list[int] | None = None, ) -> Path: """ Perform OCR on a PDF file and save the result to a text file. Args: pdf_path: Path to the PDF file output_path: Optional output file path. If None, creates .txt next to PDF. config: OCR configuration options pages: Optional list of 1-based page numbers to process Returns: Path to the created text file """ if output_path is None: output_path = pdf_path.with_suffix(".txt") combined_text, _ = ocr_pdf(pdf_path, config, pages) output_path.write_text(combined_text, encoding="utf-8") return output_path

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jangisaac-dev/owlocr-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

pdf.py•3.34 KiB