Skip to main content
Glama
mcp_ocr_server.py6.56 kB
""" OCR MCP服务器 - 基于PaddleOCR的图片文字识别服务 使用FastMCP框架实现,支持通过MCP协议为AI Agent提供OCR能力。 """ import logging import sys from pathlib import Path from typing import Any from mcp.server.fastmcp import FastMCP from paddleocr import PaddleOCR # 配置日志 log_dir = Path(__file__).parent / "logs" log_dir.mkdir(exist_ok=True) log_file = log_dir / "mcp_ocr_server.log" logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler(log_file, encoding='utf-8'), logging.StreamHandler(sys.stderr) ] ) logger = logging.getLogger(__name__) logger.info(f"日志文件位置: {log_file}") # 创建FastMCP服务器实例 mcp = FastMCP("OCR Service") # 全局OCR实例(延迟初始化) _ocr_instance: PaddleOCR | None = None def get_ocr_instance() -> PaddleOCR: """获取OCR实例(单例模式,延迟初始化)""" global _ocr_instance if _ocr_instance is None: logger.info("初始化PaddleOCR...") _ocr_instance = PaddleOCR(use_angle_cls=True, lang='ch') logger.info("PaddleOCR初始化完成") return _ocr_instance @mcp.tool() def recognize_text_from_path(image_path: str) -> dict[str, Any]: """ 从图片文件路径识别文字 Args: image_path: 图片文件的路径(绝对路径或相对路径) Returns: 包含识别结果的字典: - text: 识别出的完整文本 - lines: 按行组织的识别结果列表,每行包含文本、置信度和位置信息 - success: 是否成功 - error: 错误信息(如果有) """ try: # 验证文件路径 path = Path(image_path) if not path.exists(): error_msg = f"文件不存在: {image_path}" logger.error(error_msg) return { "success": False, "error": error_msg, "text": "", "lines": [] } if not path.is_file(): error_msg = f"路径不是文件: {image_path}" logger.error(error_msg) return { "success": False, "error": error_msg, "text": "", "lines": [] } logger.info(f"开始OCR识别: {image_path}") # 获取OCR实例并执行识别 ocr = get_ocr_instance() result = ocr.ocr(str(path)) # 调试:记录返回结果的结构 logger.info(f"OCR返回结果类型: {type(result)}") if result is not None: logger.info(f"result是否为None: False") if isinstance(result, (list, tuple)): logger.info(f"result是列表/元组,长度: {len(result)}") if len(result) > 0: logger.info(f"result[0]类型: {type(result[0])}") if isinstance(result[0], (list, tuple)): logger.info(f"result[0]是列表/元组,长度: {len(result[0])}") if len(result[0]) > 0: logger.info(f"result[0][0]类型: {type(result[0][0])}, 内容: {str(result[0][0])[:200]}") elif isinstance(result[0], str): logger.warning(f"result[0]是字符串,内容: {result[0][:200]}") elif isinstance(result, str): logger.warning(f"result是字符串,内容: {result[:200]}") else: logger.warning("OCR返回结果为None") # 处理识别结果 texts = [] lines = [] # 检查result格式 if result is None: logger.warning("OCR返回结果为None") elif isinstance(result, str): logger.warning(f"OCR返回结果是字符串而不是列表: {result[:200]}") elif not isinstance(result, (list, tuple)) or len(result) == 0: logger.warning(f"OCR返回结果格式异常: {type(result)}, 长度: {len(result) if hasattr(result, '__len__') else 'N/A'}") elif isinstance(result[0], str): logger.warning(f"result[0]是字符串: {result[0][:200]}") elif result and result[0]: for line in result[0]: if line: # 如果line是字符串,说明数据格式不对 if isinstance(line, str): logger.warning(f"line是字符串而不是列表: {line[:100]}") continue for word_info in line: try: # PaddleOCR返回格式: [[坐标], (文本, 置信度)] if (len(word_info) >= 2 and isinstance(word_info[1], (list, tuple)) and len(word_info[1]) >= 2): text = word_info[1][0] # 识别的文本 confidence = word_info[1][1] # 置信度 bbox = word_info[0] # 边界框坐标 texts.append(text) lines.append({ "text": text, "confidence": round(confidence, 4), "bbox": [[round(p[0]), round(p[1])] for p in bbox] if bbox else [] }) else: logger.warning(f"意外的word_info格式: {word_info}") except (IndexError, TypeError) as e: logger.warning(f"解析word_info时出错: {word_info}, 错误: {e}") continue full_text = "\n".join(texts) logger.info(f"OCR识别完成,共识别 {len(lines)} 个文本块") return { "success": True, "text": full_text, "lines": lines, "line_count": len(lines) } except Exception as e: logger.error(f"OCR识别失败: {str(e)}", exc_info=True) return { "success": False, "error": str(e), "text": "", "lines": [] } if __name__ == "__main__": # 运行MCP服务器(使用stdio传输) mcp.run()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/qiao-925/ocr-mcp-service'

If you have feedback or need assistance with the MCP directory API, please join our Discord server