OCR MCP Service

Overview Schema Related Servers Score Discussions

ocr-mcp-service
src
ocr_mcp_service

tools.py•19.6 kB

"""MCP tool definitions.""" from pathlib import Path from typing import Optional from .mcp_server import mcp from .ocr_engine import OCREngineFactory from .utils import validate_image, with_timeout from .logger import get_logger from .prompt_loader import get_scenario_template from .config import OCR_TIMEOUT, get_timeout_for_image import re def _recognize_with_engine(engine_type: str, image_path: str, **kwargs): """Internal function to recognize image with timeout protection. 使用动态超时：根据图片大小自动调整超时时间。 Args: engine_type: Type of OCR engine image_path: Path to image file **kwargs: Additional arguments for engine recognition Returns: OCRResult object """ from .utils import with_timeout # 根据图片大小动态设置超时 timeout = get_timeout_for_image(image_path) @with_timeout(timeout) def _do_recognize(): validate_image(image_path) # Handle special case for easyocr which needs languages parameter during engine creation if engine_type == "easyocr" and "languages" in kwargs: languages = kwargs.pop("languages") engine = OCREngineFactory.get_engine(engine_type, languages=languages) else: engine = OCREngineFactory.get_engine(engine_type) return engine.recognize_image(image_path, **kwargs) return _do_recognize() @mcp.tool() def recognize_image_paddleocr(image_path: str, lang: str = "ch") -> dict: """ Recognize text in an image using PaddleOCR engine. This tool performs OCR recognition and returns the recognized text, bounding boxes, confidence scores, and technical analysis. For prompt templates/examples for image analysis, use the get_prompt_template tool separately. Args: image_path: Path to the image file lang: Language code (default: 'ch' for Chinese) Returns: OCR result dictionary containing: - text: Recognized text content - boxes: Bounding boxes for text regions - confidence: Average confidence score - engine: OCR engine name - processing_time: Processing time in seconds - analysis: Technical analysis (optional) """ logger = get_logger("tools.recognize_image_paddleocr") try: logger.info(f"MCP工具调用开始: recognize_image_paddleocr, 图片路径: {image_path}, 语言: {lang}") # Recognize with timeout protection result = _recognize_with_engine("paddleocr", image_path, lang=lang) # Log result summary result_dict = result.to_dict() text_length = len(result_dict.get("text", "")) boxes_count = len(result_dict.get("boxes", [])) confidence = result_dict.get("confidence", 0.0) processing_time = result_dict.get("processing_time", 0.0) logger.info( f"MCP工具调用成功: recognize_image_paddleocr, " f"识别文本长度: {text_length}字符, " f"文本块数量: {boxes_count}, " f"平均置信度: {confidence:.2f}, " f"处理时间: {processing_time:.2f}秒" ) return result_dict except TimeoutError as e: timeout = get_timeout_for_image(image_path) error_msg = f"OCR处理超时（超过{timeout}秒）。图片可能过大或过于复杂。建议：1) 尝试压缩图片 2) 使用更快的引擎 3) 分批处理大图片" logger.error(f"MCP工具调用超时: recognize_image_paddleocr, {error_msg}") return { "error": error_msg, "error_type": "TimeoutError", "error_recovery": "尝试压缩图片或使用更快的引擎", "text": "", "boxes": [], "confidence": 0.0, "engine": "paddleocr", "processing_time": 0.0, } except FileNotFoundError as e: error_msg = f"图片文件未找到: {e}" logger.error(f"MCP工具调用失败: recognize_image_paddleocr, {error_msg}") return { "error": error_msg, "error_type": "FileNotFoundError", "text": "", "boxes": [], "confidence": 0.0, "engine": "paddleocr", "processing_time": 0.0, } except ValueError as e: error_msg = f"图片验证失败: {e}" logger.error(f"MCP工具调用失败: recognize_image_paddleocr, {error_msg}") return { "error": error_msg, "error_type": "ValueError", "text": "", "boxes": [], "confidence": 0.0, "engine": "paddleocr", "processing_time": 0.0, } except Exception as e: error_msg = f"OCR处理失败: {e}" logger.error(f"MCP工具调用失败: recognize_image_paddleocr, {error_msg}", exc_info=True) return { "error": error_msg, "error_type": type(e).__name__, "text": "", "boxes": [], "confidence": 0.0, "engine": "paddleocr", "processing_time": 0.0, } @mcp.tool() def recognize_image_deepseek(image_path: str) -> dict: """ Recognize text in an image using DeepSeek OCR engine. NOTE: This engine is NOT RECOMMENDED due to large model size (~7.8GB). Use recognize_image_paddleocr or recognize_image_paddleocr_mcp instead. Args: image_path: Path to the image file Returns: OCR result dictionary containing: - text: Recognized text content - boxes: Bounding boxes for text regions - confidence: Average confidence score - engine: OCR engine name - processing_time: Processing time in seconds - analysis: Technical analysis (optional) """ logger = get_logger("tools.recognize_image_deepseek") try: logger.info(f"MCP工具调用开始: recognize_image_deepseek, 图片路径: {image_path}") # Recognize with timeout protection result = _recognize_with_engine("deepseek", image_path) # Log result summary result_dict = result.to_dict() text_length = len(result_dict.get("text", "")) boxes_count = len(result_dict.get("boxes", [])) confidence = result_dict.get("confidence", 0.0) processing_time = result_dict.get("processing_time", 0.0) logger.info( f"MCP工具调用成功: recognize_image_deepseek, " f"识别文本长度: {text_length}字符, " f"文本块数量: {boxes_count}, " f"平均置信度: {confidence:.2f}, " f"处理时间: {processing_time:.2f}秒" ) return result_dict except TimeoutError as e: timeout = get_timeout_for_image(image_path) error_msg = f"OCR处理超时（超过{timeout}秒）。DeepSeek OCR处理大图片较慢。建议：1) 使用paddleocr或easyocr 2) 压缩图片 3) 分批处理" logger.error(f"MCP工具调用超时: recognize_image_deepseek, {error_msg}") return { "error": error_msg, "error_type": "TimeoutError", "error_recovery": "使用更快的引擎（paddleocr/easyocr）或压缩图片", "text": "", "boxes": [], "confidence": 0.0, "engine": "deepseek", "processing_time": 0.0, } except Exception as e: error_msg = f"OCR处理失败: {e}" logger.error(f"MCP工具调用失败: recognize_image_deepseek, {error_msg}", exc_info=True) return { "error": error_msg, "error_type": type(e).__name__, "text": "", "boxes": [], "confidence": 0.0, "engine": "deepseek", "processing_time": 0.0, } @mcp.tool() def recognize_image_paddleocr_mcp(image_path: str) -> dict: """ Recognize text in an image using paddleocr-mcp engine (subprocess). Args: image_path: Path to the image file Returns: OCR result dictionary containing: - text: Recognized text content - boxes: Bounding boxes for text regions - confidence: Average confidence score - engine: OCR engine name - processing_time: Processing time in seconds - analysis: Technical analysis (optional) """ logger = get_logger("tools.recognize_image_paddleocr_mcp") try: logger.info(f"MCP工具调用开始: recognize_image_paddleocr_mcp, 图片路径: {image_path}") # Recognize with timeout protection result = _recognize_with_engine("paddleocr_mcp", image_path) # Log result summary result_dict = result.to_dict() text_length = len(result_dict.get("text", "")) boxes_count = len(result_dict.get("boxes", [])) confidence = result_dict.get("confidence", 0.0) processing_time = result_dict.get("processing_time", 0.0) logger.info( f"MCP工具调用成功: recognize_image_paddleocr_mcp, " f"识别文本长度: {text_length}字符, " f"文本块数量: {boxes_count}, " f"平均置信度: {confidence:.2f}, " f"处理时间: {processing_time:.2f}秒" ) return result_dict except TimeoutError as e: timeout = get_timeout_for_image(image_path) error_msg = f"OCR处理超时（超过{timeout}秒）。图片可能过大或过于复杂。建议：1) 尝试压缩图片 2) 使用更快的引擎 3) 分批处理大图片" logger.error(f"MCP工具调用超时: recognize_image_paddleocr_mcp, {error_msg}") return { "error": error_msg, "error_type": "TimeoutError", "error_recovery": "尝试压缩图片或使用更快的引擎", "text": "", "boxes": [], "confidence": 0.0, "engine": "paddleocr_mcp", "processing_time": 0.0, } except Exception as e: error_msg = f"OCR处理失败: {e}" logger.error(f"MCP工具调用失败: recognize_image_paddleocr_mcp, {error_msg}", exc_info=True) return { "error": error_msg, "error_type": type(e).__name__, "text": "", "boxes": [], "confidence": 0.0, "engine": "paddleocr_mcp", "processing_time": 0.0, } @mcp.tool() def recognize_image_easyocr(image_path: str, languages: str = "ch_sim,en") -> dict: """ Recognize text in an image using EasyOCR engine. EasyOCR supports 80+ languages and is easy to use. Good for multilingual scenarios. Args: image_path: Path to the image file languages: Comma-separated language codes (default: 'ch_sim,en' for Chinese Simplified and English). Common codes: 'en' (English), 'ch_sim' (Chinese Simplified), 'ch_tra' (Chinese Traditional), 'ja' (Japanese), 'ko' (Korean), 'fr' (French), 'de' (German), etc. Returns: OCR result dictionary containing: - text: Recognized text content - boxes: Bounding boxes for text regions - confidence: Average confidence score - engine: OCR engine name - processing_time: Processing time in seconds - analysis: Technical analysis (optional) """ logger = get_logger("tools.recognize_image_easyocr") try: logger.info(f"MCP工具调用开始: recognize_image_easyocr, 图片路径: {image_path}, 语言: {languages}") # Parse languages lang_list = [lang.strip() for lang in languages.split(',') if lang.strip()] # Recognize with timeout protection result = _recognize_with_engine("easyocr", image_path, languages=lang_list) # Log result summary result_dict = result.to_dict() text_length = len(result_dict.get("text", "")) boxes_count = len(result_dict.get("boxes", [])) confidence = result_dict.get("confidence", 0.0) processing_time = result_dict.get("processing_time", 0.0) logger.info( f"MCP工具调用成功: recognize_image_easyocr, " f"识别文本长度: {text_length}字符, " f"文本块数量: {boxes_count}, " f"平均置信度: {confidence:.2f}, " f"处理时间: {processing_time:.2f}秒" ) return result_dict except TimeoutError as e: timeout = get_timeout_for_image(image_path) error_msg = f"OCR处理超时（超过{timeout}秒）。图片可能过大或过于复杂。建议：1) 尝试压缩图片 2) 减少支持的语言数量 3) 分批处理大图片" logger.error(f"MCP工具调用超时: recognize_image_easyocr, {error_msg}") return { "error": error_msg, "error_type": "TimeoutError", "error_recovery": "压缩图片或减少支持的语言数量", "text": "", "boxes": [], "confidence": 0.0, "engine": "easyocr", "processing_time": 0.0, } except Exception as e: error_msg = f"OCR处理失败: {e}" logger.error(f"MCP工具调用失败: recognize_image_easyocr, {error_msg}", exc_info=True) return { "error": error_msg, "error_type": type(e).__name__, "text": "", "boxes": [], "confidence": 0.0, "engine": "easyocr", "processing_time": 0.0, } @mcp.tool() def get_prompt_template() -> dict: """ Get general prompt template for image analysis. Returns: Dictionary with template. """ logger = get_logger("tools.get_prompt_template") try: logger.info("MCP工具调用开始: get_prompt_template") template = get_scenario_template() logger.info("MCP工具调用成功: get_prompt_template") return { "template": template, "scenario_name": "通用模板" } except Exception as e: logger.error(f"MCP工具调用失败: get_prompt_template, 错误: {e}", exc_info=True) return { "error": str(e), "template": None } def _get_usage_guide_file_path() -> Optional[Path]: """Get the path to the usage guide file.""" # Try multiple locations try: # 1. In the installed package import importlib.resources with importlib.resources.files("ocr_mcp_service") as package_path: guide_path = package_path.parent.parent / "usage_guide.md" if guide_path.exists(): return guide_path except Exception: pass # 2. In the project root (development mode) try: package_path = Path(__file__).parent.parent.parent guide_path = package_path / "usage_guide.md" if guide_path.exists(): return guide_path except Exception: pass # 3. Try relative to current file try: current_file = Path(__file__) project_root = current_file.parent.parent.parent.parent guide_path = project_root / "usage_guide.md" if guide_path.exists(): return guide_path except Exception: pass return None def _load_usage_guide_from_file() -> dict: """Load usage guide from file and parse into sections.""" guide_path = _get_usage_guide_file_path() if guide_path is None: raise FileNotFoundError( "无法找到使用指南文件。请确保文件已正确安装或位于usage_guide.md" ) try: with open(guide_path, "r", encoding="utf-8") as f: content = f.read() if not content.strip(): raise ValueError("使用指南文件为空") # Split by "---" separator (markdown horizontal rule) # The file has three sections: guide, tips, examples parts = re.split(r'^---+$', content, flags=re.MULTILINE) if len(parts) >= 3: return { "guide": parts[0].strip(), "tips": parts[1].strip(), "examples": parts[2].strip() } elif len(parts) == 2: return { "guide": parts[0].strip(), "tips": parts[1].strip(), "examples": "" } else: # If no separator found, treat entire content as guide return { "guide": content.strip(), "tips": "", "examples": "" } except FileNotFoundError: raise except Exception as e: raise IOError(f"无法读取使用指南文件: {e}") @mcp.tool() def health_check() -> dict: """ Check MCP service health status. Returns service health information including loaded engines, usage statistics, and system status. Returns: Dictionary containing: - status: Service status (healthy/degraded/unhealthy) - engines_loaded: Number of loaded OCR engines - engines: List of loaded engine names - usage_stats: Engine usage statistics - timestamp: Check timestamp """ from datetime import datetime logger = get_logger("tools.health_check") try: logger.info("MCP工具调用开始: health_check") # Get engine statistics stats = OCREngineFactory.get_usage_stats() # Determine service status status = "healthy" if stats["total_engines"] == 0: status = "degraded" # No engines loaded, but service is running result = { "status": status, "engines_loaded": stats["total_engines"], "engines": stats["engines"], "usage_stats": stats["usage_count"], "timestamp": datetime.now().isoformat() } logger.info(f"MCP工具调用成功: health_check, 状态: {status}") return result except Exception as e: logger.error(f"MCP工具调用失败: health_check, 错误: {e}", exc_info=True) return { "status": "unhealthy", "error": str(e), "error_type": type(e).__name__, "timestamp": datetime.now().isoformat() } @mcp.tool() def get_usage_guide() -> dict: """ Get usage guide and tips for using OCR MCP service. This tool provides comprehensive usage instructions, tips, and examples for using the OCR MCP service effectively. Returns: Dictionary containing: - guide: Complete usage guide - tips: Usage tips and best practices - examples: Practical usage examples """ logger = get_logger("tools.get_usage_guide") try: logger.info("MCP工具调用开始: get_usage_guide") # Load from file (will raise exception if file not found) guide = _load_usage_guide_from_file() logger.info("MCP工具调用成功: get_usage_guide") return guide except Exception as e: logger.error(f"MCP工具调用失败: get_usage_guide, 错误: {e}", exc_info=True) return { "error": str(e), "guide": None, "tips": None, "examples": None }

Latest Blog Posts

What Is Context Bloat in MCP?
By Om-Shree-0709 on December 16, 2025.
mcp
Context Bloat
MCP Moves to the Linux Foundation: Neutral Stewardship for Agentic Infrastructure
By Om-Shree-0709 on December 15, 2025.
mcp
anthropic
Linux Foundation
Code Execution with MCP: Architecting Agentic Efficiency
By Om-Shree-0709 on December 14, 2025.
mcp
Token bloat

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/qiao-925/ocr-mcp-service'

If you have feedback or need assistance with the MCP directory API, please join our Discord server