OwlOCR MCP

benchmark.py•10.2 KiB

#!/usr/bin/env python3 """Benchmark script to compare Vision Framework vs OwlOCR CLI performance. Uses the same sandbox-safe approach as the Finder Quick Action. """ from __future__ import annotations import argparse import shutil import subprocess import tempfile import time from dataclasses import dataclass from pathlib import Path import pypdfium2 as pdfium # OwlOCR sandbox paths (required to avoid file picker dialog) OWLOCR_BINARY = Path("/Applications/OwlOCR.app/Contents/MacOS/OwlOCR") OWLOCR_SANDBOX_TMP = Path.home() / "Library/Containers/JonLuca-DeCaro.OwlOCR/Data/tmp" @dataclass class BenchmarkResult: """Result of a single benchmark run.""" method: str pdf_path: str total_pages: int total_time_seconds: float time_per_page_seconds: float text_length: int success: bool error: str | None = None extracted_text: str = "" def benchmark_vision_framework(pdf_path: Path, dpi: int = 200) -> BenchmarkResult: """Benchmark using macOS Vision Framework (PyObjC).""" from owlocr_mcp.pdf import PDFOCRConfig, ocr_pdf start_time = time.perf_counter() try: config = PDFOCRConfig(dpi=dpi) combined_text, results = ocr_pdf(pdf_path, config) elapsed = time.perf_counter() - start_time return BenchmarkResult( method="Vision Framework (PyObjC)", pdf_path=str(pdf_path), total_pages=len(results), total_time_seconds=elapsed, time_per_page_seconds=elapsed / len(results) if results else 0, text_length=len(combined_text), success=True, extracted_text=combined_text, ) except Exception as e: elapsed = time.perf_counter() - start_time return BenchmarkResult( method="Vision Framework (PyObjC)", pdf_path=str(pdf_path), total_pages=0, total_time_seconds=elapsed, time_per_page_seconds=0, text_length=0, success=False, error=str(e), ) def _run_owlocr_on_image(image_path: Path, output_dir: Path) -> str: """ Run OwlOCR CLI on a single image using sandbox-safe approach. Copies the image to OwlOCR's sandbox temp directory before processing to avoid the file picker dialog. """ if not OWLOCR_BINARY.exists(): raise FileNotFoundError(f"OwlOCR binary not found: {OWLOCR_BINARY}") # Ensure sandbox temp directory exists OWLOCR_SANDBOX_TMP.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True) # Copy input to sandbox temp tmp_input = OWLOCR_SANDBOX_TMP / image_path.name shutil.copy2(image_path, tmp_input) try: # Run OwlOCR with output redirected to sandbox temp tmp_output = OWLOCR_SANDBOX_TMP / "output.txt" tmp_stderr = OWLOCR_SANDBOX_TMP / "stderr.log" with open(tmp_output, "w") as stdout_file, open(tmp_stderr, "w") as stderr_file: result = subprocess.run( [str(OWLOCR_BINARY), "--cli", "--input", str(tmp_input)], stdout=stdout_file, stderr=stderr_file, timeout=60, ) if result.returncode != 0: stderr_content = tmp_stderr.read_text() if tmp_stderr.exists() else "" raise RuntimeError(f"OwlOCR failed ({result.returncode}): {stderr_content}") # Read output if tmp_output.exists(): text = tmp_output.read_text(encoding="utf-8", errors="replace").strip() else: text = "" # Move results to output directory final_output = output_dir / "output.txt" if tmp_output.exists(): shutil.move(str(tmp_output), str(final_output)) return text finally: # Cleanup sandbox temp files if tmp_input.exists(): tmp_input.unlink() if (OWLOCR_SANDBOX_TMP / "output.txt").exists(): (OWLOCR_SANDBOX_TMP / "output.txt").unlink() if (OWLOCR_SANDBOX_TMP / "stderr.log").exists(): (OWLOCR_SANDBOX_TMP / "stderr.log").unlink() def benchmark_owlocr_cli(pdf_path: Path, dpi: int = 200) -> BenchmarkResult: """Benchmark using OwlOCR CLI with sandbox-safe approach.""" if not OWLOCR_BINARY.exists(): return BenchmarkResult( method="OwlOCR CLI", pdf_path=str(pdf_path), total_pages=0, total_time_seconds=0, time_per_page_seconds=0, text_length=0, success=False, error=f"OwlOCR.app not found at {OWLOCR_BINARY}", ) start_time = time.perf_counter() try: # Load PDF and count pages pdf_doc = pdfium.PdfDocument(str(pdf_path)) total_pages = len(pdf_doc) scale = dpi / 72 all_text_parts = [] work_dir = Path(tempfile.mkdtemp(prefix="owlocr_benchmark_")) try: pages_png_dir = work_dir / "pages_png" ocr_output_dir = work_dir / "ocr" pages_png_dir.mkdir(parents=True, exist_ok=True) for page_idx in range(total_pages): page_num = page_idx + 1 # Render page to PNG page = pdf_doc[page_idx] bitmap = page.render(scale=int(scale)) pil_image = bitmap.to_pil() png_path = pages_png_dir / f"page_{page_num:04d}.png" pil_image.save(png_path) # Run OwlOCR CLI (sandbox-safe) page_output_dir = ocr_output_dir / f"page_{page_num:04d}" page_text = _run_owlocr_on_image(png_path, page_output_dir) if page_idx > 0: all_text_parts.append(f"\n\n===== Page {page_num} =====\n\n") all_text_parts.append(page_text) finally: shutil.rmtree(work_dir, ignore_errors=True) combined_text = "".join(all_text_parts) elapsed = time.perf_counter() - start_time return BenchmarkResult( method="OwlOCR CLI", pdf_path=str(pdf_path), total_pages=total_pages, total_time_seconds=elapsed, time_per_page_seconds=elapsed / total_pages if total_pages else 0, text_length=len(combined_text), success=True, extracted_text=combined_text, ) except Exception as e: elapsed = time.perf_counter() - start_time return BenchmarkResult( method="OwlOCR CLI", pdf_path=str(pdf_path), total_pages=0, total_time_seconds=elapsed, time_per_page_seconds=0, text_length=0, success=False, error=str(e), ) def print_result(result: BenchmarkResult, show_text: bool = False) -> None: """Print benchmark result in a formatted way.""" print(f"\n{'=' * 60}") print(f"Method: {result.method}") print(f"{'=' * 60}") if result.success: print(f" PDF: {result.pdf_path}") print(f" Pages: {result.total_pages}") print(f" Total Time: {result.total_time_seconds:.2f} seconds") print(f" Time/Page: {result.time_per_page_seconds:.2f} seconds") print(f" Text Length: {result.text_length:,} characters") if show_text: print(f"\n --- Extracted Text (first 500 chars) ---") print(f" {result.extracted_text[:500]}...") else: print(f" ❌ FAILED: {result.error}") def main(): parser = argparse.ArgumentParser( description="Benchmark Vision Framework vs OwlOCR CLI for PDF OCR" ) parser.add_argument("pdf_path", type=Path, help="Path to PDF file to benchmark") parser.add_argument("--dpi", type=int, default=200, help="DPI for rendering (default: 200)") parser.add_argument( "--method", choices=["both", "vision", "owlocr"], default="both", help="Which method to benchmark (default: both)", ) parser.add_argument( "--show-text", action="store_true", help="Show extracted text preview", ) parser.add_argument( "--max-pages", type=int, default=None, help="Process only first N pages (for quick testing)", ) args = parser.parse_args() if not args.pdf_path.exists(): print(f"Error: PDF file not found: {args.pdf_path}") return 1 print(f"\n🔍 Benchmarking PDF OCR") print(f" File: {args.pdf_path}") print(f" DPI: {args.dpi}") if args.max_pages: print(f" Max Pages: {args.max_pages}") results = [] if args.method in ("both", "vision"): print("\n⏳ Running Vision Framework benchmark...") result = benchmark_vision_framework(args.pdf_path, args.dpi) results.append(result) print_result(result, args.show_text) if args.method in ("both", "owlocr"): print("\n⏳ Running OwlOCR CLI benchmark...") result = benchmark_owlocr_cli(args.pdf_path, args.dpi) results.append(result) print_result(result, args.show_text) # Summary comparison if len(results) == 2 and all(r.success for r in results): print(f"\n{'=' * 60}") print("📊 COMPARISON SUMMARY") print(f"{'=' * 60}") vision_result = results[0] owlocr_result = results[1] faster = ( "Vision Framework" if vision_result.total_time_seconds < owlocr_result.total_time_seconds else "OwlOCR CLI" ) speedup = max(vision_result.total_time_seconds, owlocr_result.total_time_seconds) / min( vision_result.total_time_seconds, owlocr_result.total_time_seconds ) print(f" Winner: {faster} ({speedup:.1f}x faster)") print(f" Vision Framework: {vision_result.total_time_seconds:.2f}s") print(f" OwlOCR CLI: {owlocr_result.total_time_seconds:.2f}s") # Text length comparison vision_len = vision_result.text_length owlocr_len = owlocr_result.text_length diff = abs(vision_len - owlocr_len) print(f"\n Text Output:") print(f" Vision Framework: {vision_len:,} chars") print(f" OwlOCR CLI: {owlocr_len:,} chars") print(f" Difference: {diff:,} chars ({diff / max(vision_len, owlocr_len) * 100:.1f}%)") return 0 if __name__ == "__main__": exit(main())

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jangisaac-dev/owlocr-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

benchmark.py•10.2 KiB