KSJ MCP Server

Overview Schema Related Servers Score Discussions

ksj-mcp
src
ksj_mcp

ocr.py•4.34 KiB

""" OCR layer for KSJ MCP server. Wraps pytesseract with a clear, actionable error when Tesseract is not installed. All public functions raise OcrNotAvailableError rather than letting a cryptic pytesseract error bubble up to Claude Desktop. """ import re from pathlib import Path class OcrNotAvailableError(RuntimeError): """Raised when Tesseract OCR binary is not installed or not found on PATH.""" INSTALL_GUIDE = ( "Tesseract OCR is not installed or not on your PATH.\n\n" "Install instructions:\n" " Windows : https://github.com/UB-Mannheim/tesseract/wiki\n" " (download the installer, accept default PATH option)\n" " macOS : brew install tesseract\n" " Linux : sudo apt install tesseract-ocr # or equivalent\n\n" "After installing, restart Claude Desktop so the updated PATH is picked up.\n" "Then try uploading your capture again." ) def __init__(self, original: Exception | None = None): detail = f" ({original})" if original else "" super().__init__(self.INSTALL_GUIDE + detail) # ── Internal helpers ────────────────────────────────────────────────────────── def _import_tesseract(): """Lazy import so the server starts even when pytesseract is installed but the Tesseract binary itself is absent.""" try: import pytesseract return pytesseract except ImportError as e: raise OcrNotAvailableError(e) from e def _run_ocr(image_path: Path) -> tuple[str, float]: """Run Tesseract on *image_path*, return (text, confidence 0-1).""" pytesseract = _import_tesseract() try: from PIL import Image img = Image.open(image_path) # Attempt to get per-word confidence data data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT) text = pytesseract.image_to_string(img) # Average confidence of words with conf > 0 confs = [c for c in data["conf"] if isinstance(c, (int, float)) and c >= 0] confidence = (sum(confs) / len(confs) / 100.0) if confs else 0.0 return text, confidence except pytesseract.TesseractNotFoundError as e: raise OcrNotAvailableError(e) from e except Exception as e: # Surface unexpected errors clearly raise RuntimeError(f"OCR failed on {image_path}: {e}") from e # ── Template detection ──────────────────────────────────────────────────────── _TEMPLATE_PATTERNS = [ (re.compile(r'\bRC-(\d{3})\b', re.IGNORECASE), "RC"), (re.compile(r'\bSYN-(\d{3})\b', re.IGNORECASE), "SYN"), (re.compile(r'\bREV-(\d{3})\b', re.IGNORECASE), "REV"), (re.compile(r'\bDC-(\d{3})\b', re.IGNORECASE), "DC"), ] def detect_template_type(text: str) -> tuple[str, str]: """ Scan OCR text for a template ID (e.g. RC-001). Returns (template_type, template_id) or ("UNKNOWN", "") if not found. """ for pattern, ttype in _TEMPLATE_PATTERNS: m = pattern.search(text) if m: return ttype, f"{ttype}-{m.group(1)}" return "UNKNOWN", "" # ── Public API ──────────────────────────────────────────────────────────────── def extract_text(image_path: str | Path) -> dict: """ Run OCR on *image_path* and return a structured result dict. Returns: { "raw_text": str, "template_type": str, # RC | SYN | REV | DC | UNKNOWN "template_id": str, # e.g. RC-001 (empty if unknown) "confidence": float, # 0.0 – 1.0 } Raises: OcrNotAvailableError if Tesseract is missing FileNotFoundError if image_path does not exist """ path = Path(image_path) if not path.exists(): raise FileNotFoundError(f"Image not found: {path}") raw_text, confidence = _run_ocr(path) template_type, template_id = detect_template_type(raw_text) return { "raw_text": raw_text, "template_type": template_type, "template_id": template_id, "confidence": round(confidence, 3), }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ChavezAILabs/ksj-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

ocr.py•4.34 KiB