ThinkDrop Vision Service

Overview Schema Related Servers Score Discussions

ocr_engine.py•5.09 KiB

""" OCR Engine Text extraction using PaddleOCR """ import os import logging import numpy as np import cv2 from PIL import Image from typing import List, Dict, Any logger = logging.getLogger(__name__) class OCREngine: """Handles OCR operations using PaddleOCR""" _instance = None _ocr = None @classmethod def get_instance(cls): """Singleton pattern for OCR engine""" if cls._instance is None: cls._instance = cls() return cls._instance def __init__(self): """Initialize OCR engine (lazy-loaded)""" self._ocr = None def _ensure_loaded(self): """Load OCR model if not already loaded""" if self._ocr is None: try: from paddleocr import PaddleOCR lang = os.getenv('OCR_LANGUAGE', 'en') use_angle_cls = os.getenv('OCR_USE_ANGLE_CLS', 'true').lower() == 'true' logger.info(f"Loading PaddleOCR (lang={lang}, angle_cls={use_angle_cls})...") try: self._ocr = PaddleOCR( use_textline_orientation=use_angle_cls, # Updated parameter name lang=lang ) logger.info(f"PaddleOCR initialized (lang={lang})") except Exception as e: logger.error(f"Failed to initialize PaddleOCR: {e}") raise except Exception as e: logger.error(f"Failed to load PaddleOCR: {e}") raise @staticmethod def is_available() -> bool: """Check if OCR is available""" try: from paddleocr import PaddleOCR return True except ImportError: logger.warning("PaddleOCR not installed") return False def extract_text(self, img: Image.Image, language: str = None) -> List[Dict[str, Any]]: """ Extract text from image Args: img: PIL Image language: Optional language override Returns: List of detected text items with bounding boxes and confidence scores """ self._ensure_loaded() try: # Convert PIL Image to OpenCV format (BGR) img_array = np.array(img) img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR) # Run OCR result = self._ocr.ocr(img_bgr) # Parse results items = [] if result and result[0]: for line in result[0]: try: # Handle different result formats if isinstance(line, (list, tuple)) and len(line) >= 2: # line[0] = [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] # line[1] = (text, confidence) or just text bbox_points = line[0] # Handle text/confidence tuple or dict if isinstance(line[1], (list, tuple)) and len(line[1]) >= 2: text, confidence = line[1][0], line[1][1] elif isinstance(line[1], dict): text = line[1].get('text', '') confidence = line[1].get('confidence', 0.0) else: text = str(line[1]) confidence = 1.0 # Convert to simple bbox [x1, y1, x2, y2] x_coords = [p[0] for p in bbox_points] y_coords = [p[1] for p in bbox_points] bbox = [ min(x_coords), min(y_coords), max(x_coords), max(y_coords) ] items.append({ "text": text, "bbox": bbox, "confidence": float(confidence) }) except Exception as e: logger.warning(f"Failed to parse OCR line: {e}, line={line}") continue logger.info(f"Extracted {len(items)} text items") return items except Exception as e: logger.error(f"OCR extraction failed: {e}") raise def extract_text_concat(self, img: Image.Image, language: str = None) -> str: """ Extract text and concatenate into single string Args: img: PIL Image language: Optional language override Returns: Concatenated text string """ items = self.extract_text(img, language) return " ".join(item["text"] for item in items)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/lukaizhi5559/thinkdrop-vision-service'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

ocr_engine.py•5.09 KiB