OwlOCR MCP

Overview Schema Related Servers Score Discussions

owlocr-mcp
src
owlocr_mcp

ocr.py•4.33 KiB

"""OCR core module using macOS Vision Framework.""" from __future__ import annotations from dataclasses import dataclass from pathlib import Path import objc import Quartz import Vision from PIL import Image @dataclass class OCRResult: """Result of OCR operation on a single page.""" page_number: int text: str confidence: float def ocr_image(image_path: Path, languages: list[str] | None = None) -> str: """ Perform OCR on an image file using macOS Vision Framework. Args: image_path: Path to the image file languages: List of language codes (e.g., ["ko-KR", "en-US"]) Returns: Extracted text from the image """ # Load image as CIImage image_url = Quartz.CFURLCreateWithFileSystemPath( None, str(image_path), Quartz.kCFURLPOSIXPathStyle, False ) ci_image = Quartz.CIImage.imageWithContentsOfURL_(image_url) if ci_image is None: raise ValueError(f"Failed to load image: {image_path}") # Create text recognition request request = Vision.VNRecognizeTextRequest.alloc().init() # Configure request request.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate) request.setUsesLanguageCorrection_(True) if languages: request.setRecognitionLanguages_(languages) else: # Default: Korean + English request.setRecognitionLanguages_(["ko-KR", "en-US"]) # Create request handler and perform OCR handler = Vision.VNImageRequestHandler.alloc().initWithCIImage_options_(ci_image, None) success, error = handler.performRequests_error_([request], None) if not success: error_msg = error.localizedDescription() if error else "Unknown error" raise RuntimeError(f"OCR failed: {error_msg}") # Extract text from results results = request.results() if not results: return "" # Sort by Y position (top to bottom) then X position (left to right) def get_position(observation: objc.objc_object) -> tuple[float, float]: bbox = observation.boundingBox() # Vision uses bottom-left origin, so invert Y for top-to-bottom order return (1 - bbox.origin.y - bbox.size.height, bbox.origin.x) sorted_results = sorted(results, key=get_position) lines = [] for observation in sorted_results: top_candidate = observation.topCandidates_(1) if top_candidate: lines.append(top_candidate[0].string()) return "\n".join(lines) def ocr_image_pil(pil_image: Image.Image, languages: list[str] | None = None) -> str: """ Perform OCR on a PIL Image using macOS Vision Framework. Args: pil_image: PIL Image object languages: List of language codes Returns: Extracted text from the image """ # Convert PIL Image to CIImage via bytes import io buffer = io.BytesIO() pil_image.save(buffer, format="PNG") image_data = buffer.getvalue() ns_data = Quartz.NSData.dataWithBytes_length_(image_data, len(image_data)) ci_image = Quartz.CIImage.imageWithData_(ns_data) if ci_image is None: raise ValueError("Failed to convert PIL Image to CIImage") # Create text recognition request request = Vision.VNRecognizeTextRequest.alloc().init() request.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate) request.setUsesLanguageCorrection_(True) if languages: request.setRecognitionLanguages_(languages) else: request.setRecognitionLanguages_(["ko-KR", "en-US"]) # Perform OCR handler = Vision.VNImageRequestHandler.alloc().initWithCIImage_options_(ci_image, None) success, error = handler.performRequests_error_([request], None) if not success: error_msg = error.localizedDescription() if error else "Unknown error" raise RuntimeError(f"OCR failed: {error_msg}") # Extract text results = request.results() if not results: return "" def get_position(observation: objc.objc_object) -> tuple[float, float]: bbox = observation.boundingBox() return (1 - bbox.origin.y - bbox.size.height, bbox.origin.x) sorted_results = sorted(results, key=get_position) lines = [] for observation in sorted_results: top_candidate = observation.topCandidates_(1) if top_candidate: lines.append(top_candidate[0].string()) return "\n".join(lines)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jangisaac-dev/owlocr-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

ocr.py•4.33 KiB