#!/usr/bin/env python3
"""Benchmark script to compare Vision Framework vs OwlOCR CLI performance.
Uses the same sandbox-safe approach as the Finder Quick Action.
"""
from __future__ import annotations
import argparse
import shutil
import subprocess
import tempfile
import time
from dataclasses import dataclass
from pathlib import Path
import pypdfium2 as pdfium
# OwlOCR sandbox paths (required to avoid file picker dialog)
OWLOCR_BINARY = Path("/Applications/OwlOCR.app/Contents/MacOS/OwlOCR")
OWLOCR_SANDBOX_TMP = Path.home() / "Library/Containers/JonLuca-DeCaro.OwlOCR/Data/tmp"
@dataclass
class BenchmarkResult:
"""Result of a single benchmark run."""
method: str
pdf_path: str
total_pages: int
total_time_seconds: float
time_per_page_seconds: float
text_length: int
success: bool
error: str | None = None
extracted_text: str = ""
def benchmark_vision_framework(pdf_path: Path, dpi: int = 200) -> BenchmarkResult:
"""Benchmark using macOS Vision Framework (PyObjC)."""
from owlocr_mcp.pdf import PDFOCRConfig, ocr_pdf
start_time = time.perf_counter()
try:
config = PDFOCRConfig(dpi=dpi)
combined_text, results = ocr_pdf(pdf_path, config)
elapsed = time.perf_counter() - start_time
return BenchmarkResult(
method="Vision Framework (PyObjC)",
pdf_path=str(pdf_path),
total_pages=len(results),
total_time_seconds=elapsed,
time_per_page_seconds=elapsed / len(results) if results else 0,
text_length=len(combined_text),
success=True,
extracted_text=combined_text,
)
except Exception as e:
elapsed = time.perf_counter() - start_time
return BenchmarkResult(
method="Vision Framework (PyObjC)",
pdf_path=str(pdf_path),
total_pages=0,
total_time_seconds=elapsed,
time_per_page_seconds=0,
text_length=0,
success=False,
error=str(e),
)
def _run_owlocr_on_image(image_path: Path, output_dir: Path) -> str:
"""
Run OwlOCR CLI on a single image using sandbox-safe approach.
Copies the image to OwlOCR's sandbox temp directory before processing
to avoid the file picker dialog.
"""
if not OWLOCR_BINARY.exists():
raise FileNotFoundError(f"OwlOCR binary not found: {OWLOCR_BINARY}")
# Ensure sandbox temp directory exists
OWLOCR_SANDBOX_TMP.mkdir(parents=True, exist_ok=True)
output_dir.mkdir(parents=True, exist_ok=True)
# Copy input to sandbox temp
tmp_input = OWLOCR_SANDBOX_TMP / image_path.name
shutil.copy2(image_path, tmp_input)
try:
# Run OwlOCR with output redirected to sandbox temp
tmp_output = OWLOCR_SANDBOX_TMP / "output.txt"
tmp_stderr = OWLOCR_SANDBOX_TMP / "stderr.log"
with open(tmp_output, "w") as stdout_file, open(tmp_stderr, "w") as stderr_file:
result = subprocess.run(
[str(OWLOCR_BINARY), "--cli", "--input", str(tmp_input)],
stdout=stdout_file,
stderr=stderr_file,
timeout=60,
)
if result.returncode != 0:
stderr_content = tmp_stderr.read_text() if tmp_stderr.exists() else ""
raise RuntimeError(f"OwlOCR failed ({result.returncode}): {stderr_content}")
# Read output
if tmp_output.exists():
text = tmp_output.read_text(encoding="utf-8", errors="replace").strip()
else:
text = ""
# Move results to output directory
final_output = output_dir / "output.txt"
if tmp_output.exists():
shutil.move(str(tmp_output), str(final_output))
return text
finally:
# Cleanup sandbox temp files
if tmp_input.exists():
tmp_input.unlink()
if (OWLOCR_SANDBOX_TMP / "output.txt").exists():
(OWLOCR_SANDBOX_TMP / "output.txt").unlink()
if (OWLOCR_SANDBOX_TMP / "stderr.log").exists():
(OWLOCR_SANDBOX_TMP / "stderr.log").unlink()
def benchmark_owlocr_cli(pdf_path: Path, dpi: int = 200) -> BenchmarkResult:
"""Benchmark using OwlOCR CLI with sandbox-safe approach."""
if not OWLOCR_BINARY.exists():
return BenchmarkResult(
method="OwlOCR CLI",
pdf_path=str(pdf_path),
total_pages=0,
total_time_seconds=0,
time_per_page_seconds=0,
text_length=0,
success=False,
error=f"OwlOCR.app not found at {OWLOCR_BINARY}",
)
start_time = time.perf_counter()
try:
# Load PDF and count pages
pdf_doc = pdfium.PdfDocument(str(pdf_path))
total_pages = len(pdf_doc)
scale = dpi / 72
all_text_parts = []
work_dir = Path(tempfile.mkdtemp(prefix="owlocr_benchmark_"))
try:
pages_png_dir = work_dir / "pages_png"
ocr_output_dir = work_dir / "ocr"
pages_png_dir.mkdir(parents=True, exist_ok=True)
for page_idx in range(total_pages):
page_num = page_idx + 1
# Render page to PNG
page = pdf_doc[page_idx]
bitmap = page.render(scale=int(scale))
pil_image = bitmap.to_pil()
png_path = pages_png_dir / f"page_{page_num:04d}.png"
pil_image.save(png_path)
# Run OwlOCR CLI (sandbox-safe)
page_output_dir = ocr_output_dir / f"page_{page_num:04d}"
page_text = _run_owlocr_on_image(png_path, page_output_dir)
if page_idx > 0:
all_text_parts.append(f"\n\n===== Page {page_num} =====\n\n")
all_text_parts.append(page_text)
finally:
shutil.rmtree(work_dir, ignore_errors=True)
combined_text = "".join(all_text_parts)
elapsed = time.perf_counter() - start_time
return BenchmarkResult(
method="OwlOCR CLI",
pdf_path=str(pdf_path),
total_pages=total_pages,
total_time_seconds=elapsed,
time_per_page_seconds=elapsed / total_pages if total_pages else 0,
text_length=len(combined_text),
success=True,
extracted_text=combined_text,
)
except Exception as e:
elapsed = time.perf_counter() - start_time
return BenchmarkResult(
method="OwlOCR CLI",
pdf_path=str(pdf_path),
total_pages=0,
total_time_seconds=elapsed,
time_per_page_seconds=0,
text_length=0,
success=False,
error=str(e),
)
def print_result(result: BenchmarkResult, show_text: bool = False) -> None:
"""Print benchmark result in a formatted way."""
print(f"\n{'=' * 60}")
print(f"Method: {result.method}")
print(f"{'=' * 60}")
if result.success:
print(f" PDF: {result.pdf_path}")
print(f" Pages: {result.total_pages}")
print(f" Total Time: {result.total_time_seconds:.2f} seconds")
print(f" Time/Page: {result.time_per_page_seconds:.2f} seconds")
print(f" Text Length: {result.text_length:,} characters")
if show_text:
print(f"\n --- Extracted Text (first 500 chars) ---")
print(f" {result.extracted_text[:500]}...")
else:
print(f" ❌ FAILED: {result.error}")
def main():
parser = argparse.ArgumentParser(
description="Benchmark Vision Framework vs OwlOCR CLI for PDF OCR"
)
parser.add_argument("pdf_path", type=Path, help="Path to PDF file to benchmark")
parser.add_argument("--dpi", type=int, default=200, help="DPI for rendering (default: 200)")
parser.add_argument(
"--method",
choices=["both", "vision", "owlocr"],
default="both",
help="Which method to benchmark (default: both)",
)
parser.add_argument(
"--show-text",
action="store_true",
help="Show extracted text preview",
)
parser.add_argument(
"--max-pages",
type=int,
default=None,
help="Process only first N pages (for quick testing)",
)
args = parser.parse_args()
if not args.pdf_path.exists():
print(f"Error: PDF file not found: {args.pdf_path}")
return 1
print(f"\n🔍 Benchmarking PDF OCR")
print(f" File: {args.pdf_path}")
print(f" DPI: {args.dpi}")
if args.max_pages:
print(f" Max Pages: {args.max_pages}")
results = []
if args.method in ("both", "vision"):
print("\n⏳ Running Vision Framework benchmark...")
result = benchmark_vision_framework(args.pdf_path, args.dpi)
results.append(result)
print_result(result, args.show_text)
if args.method in ("both", "owlocr"):
print("\n⏳ Running OwlOCR CLI benchmark...")
result = benchmark_owlocr_cli(args.pdf_path, args.dpi)
results.append(result)
print_result(result, args.show_text)
# Summary comparison
if len(results) == 2 and all(r.success for r in results):
print(f"\n{'=' * 60}")
print("📊 COMPARISON SUMMARY")
print(f"{'=' * 60}")
vision_result = results[0]
owlocr_result = results[1]
faster = (
"Vision Framework"
if vision_result.total_time_seconds < owlocr_result.total_time_seconds
else "OwlOCR CLI"
)
speedup = max(vision_result.total_time_seconds, owlocr_result.total_time_seconds) / min(
vision_result.total_time_seconds, owlocr_result.total_time_seconds
)
print(f" Winner: {faster} ({speedup:.1f}x faster)")
print(f" Vision Framework: {vision_result.total_time_seconds:.2f}s")
print(f" OwlOCR CLI: {owlocr_result.total_time_seconds:.2f}s")
# Text length comparison
vision_len = vision_result.text_length
owlocr_len = owlocr_result.text_length
diff = abs(vision_len - owlocr_len)
print(f"\n Text Output:")
print(f" Vision Framework: {vision_len:,} chars")
print(f" OwlOCR CLI: {owlocr_len:,} chars")
print(f" Difference: {diff:,} chars ({diff / max(vision_len, owlocr_len) * 100:.1f}%)")
return 0
if __name__ == "__main__":
exit(main())