OCR-MCP

mock_backends.py•13.6 KiB

""" Mock OCR Backend Implementations Comprehensive mock implementations for all OCR backends to enable testing without requiring actual model downloads or GPU resources. """ import asyncio from typing import Any from src.ocr_mcp.core.backend_manager import OCRBackend from src.ocr_mcp.core.config import OCRConfig class MockDeepSeekBackend(OCRBackend): """Mock DeepSeek-OCR backend for testing.""" def __init__(self, config: OCRConfig): super().__init__("deepseek-ocr", config) self._available = True self.call_count = 0 self.last_call_args = None def is_available(self) -> bool: return self._available async def process_image(self, image_path: str, mode: str = "text", **kwargs) -> dict[str, Any]: self.call_count += 1 self.last_call_args = (image_path, mode, kwargs) # Simulate processing delay await asyncio.sleep(0.1) result = { "success": True, "text": f"DeepSeek OCR result for {image_path} in {mode} mode", "confidence": 0.95, "backend": "deepseek-ocr", "processing_time": 0.1, "mode": mode, "gpu_used": False, } # Add mode-specific features if mode == "formatted": result["layout_analysis"] = {"paragraphs": 3, "lines": 12, "words": 45} elif mode == "fine-grained": result["regions"] = [ {"text": "Header text", "bbox": [10, 10, 200, 50]}, {"text": "Body content", "bbox": [10, 60, 400, 200]}, ] return result def get_capabilities(self) -> dict[str, Any]: return { "name": "deepseek-ocr", "available": True, "modes": ["text", "formatted", "fine-grained"], "languages": ["en", "multilingual"], "gpu_support": False, "description": "Vision-language OCR with advanced text understanding", } class MockFlorenceBackend(OCRBackend): """Mock Florence-2 backend for testing.""" def __init__(self, config: OCRConfig): super().__init__("florence-2", config) self._available = True self.call_count = 0 def is_available(self) -> bool: return self._available async def process_image(self, image_path: str, mode: str = "text", **kwargs) -> dict[str, Any]: self.call_count += 1 await asyncio.sleep(0.15) result = { "success": True, "text": f"Florence-2 OCR result for {image_path}", "confidence": 0.92, "backend": "florence-2", "processing_time": 0.15, "mode": mode, "gpu_used": False, } # Florence-2 specific features if "region" in kwargs: result["region_ocr"] = True result["region_coords"] = kwargs["region"] return result def get_capabilities(self) -> dict[str, Any]: return { "name": "florence-2", "available": True, "modes": ["text", "formatted", "fine-grained"], "languages": ["en", "multilingual"], "gpu_support": False, "description": "Microsoft's Vision Foundation Model for various vision tasks", } class MockDOTSBackend(OCRBackend): """Mock DOTS.OCR backend for testing.""" def __init__(self, config: OCRConfig): super().__init__("dots-ocr", config) self._available = True self.call_count = 0 def is_available(self) -> bool: return self._available async def process_image(self, image_path: str, mode: str = "text", **kwargs) -> dict[str, Any]: self.call_count += 1 await asyncio.sleep(0.12) result = { "success": True, "text": f"DOTS.OCR result with document structure analysis for {image_path}", "confidence": 0.94, "backend": "dots-ocr", "processing_time": 0.12, "mode": mode, "gpu_used": False, "layout_analysis": {"tables": 2, "paragraphs": 5, "headings": 3, "lists": 1}, "table_data": [{"rows": 3, "cols": 4, "content": "Sample table data"}], } return result def get_capabilities(self) -> dict[str, Any]: return { "name": "dots-ocr", "available": True, "modes": ["text", "formatted"], "languages": ["en", "multilingual"], "gpu_support": False, "description": "Document understanding specialist for layout analysis", } class MockPPOCRBackend(OCRBackend): """Mock PP-OCRv5 backend for testing.""" def __init__(self, config: OCRConfig): super().__init__("pp-ocrv5", config) self._available = True self.call_count = 0 def is_available(self) -> bool: return self._available async def process_image(self, image_path: str, mode: str = "text", **kwargs) -> dict[str, Any]: self.call_count += 1 await asyncio.sleep(0.08) # Fast industrial processing result = { "success": True, "text": f"PP-OCRv5 industrial OCR result for {image_path}", "confidence": 0.96, "backend": "pp-ocrv5", "processing_time": 0.08, "mode": mode, "gpu_used": True, # Industrial version supports GPU "industrial_metrics": { "inference_speed": "45 FPS", "accuracy": "96.2%", "edge_deployment": True, }, } return result def get_capabilities(self) -> dict[str, Any]: return { "name": "pp-ocrv5", "available": True, "modes": ["text", "formatted"], "languages": ["en", "zh", "multilingual"], "gpu_support": True, "description": "Industrial-grade PaddlePaddle OCR system", } class MockQwenBackend(OCRBackend): """Mock Qwen-Image-Layered backend for testing.""" def __init__(self, config: OCRConfig): super().__init__("qwen-image-layered", config) self._available = True self.call_count = 0 def is_available(self) -> bool: return self._available async def process_image(self, image_path: str, mode: str = "text", **kwargs) -> dict[str, Any]: self.call_count += 1 await asyncio.sleep(0.2) result = { "success": True, "text": f"Qwen-Image-Layered decomposition result for {image_path}", "confidence": 0.93, "backend": "qwen-image-layered", "processing_time": 0.2, "mode": mode, "gpu_used": True, "layers": { "count": 4, "types": ["background", "text", "graphics", "foreground"], "editability_score": 0.89, }, "decomposition": { "rgba_layers": 4, "semantic_separation": True, "independent_manipulation": True, }, } return result def get_capabilities(self) -> dict[str, Any]: return { "name": "qwen-image-layered", "available": True, "modes": ["text", "formatted", "fine-grained"], "languages": ["en", "multilingual"], "gpu_support": True, "description": "Image decomposition model for layered editing", } class MockGOTBackend(OCRBackend): """Mock GOT-OCR2.0 backend for testing.""" def __init__(self, config: OCRConfig): super().__init__("got-ocr", config) self._available = True self.call_count = 0 def is_available(self) -> bool: return self._available async def process_image(self, image_path: str, mode: str = "text", **kwargs) -> dict[str, Any]: self.call_count += 1 await asyncio.sleep(0.18) result = { "success": True, "text": f"GOT-OCR2.0 advanced OCR result for {image_path}", "confidence": 0.97, "backend": "got-ocr", "processing_time": 0.18, "mode": mode, "gpu_used": True, "advanced_features": { "multi_modal": True, "layout_understanding": True, "text_line_detection": True, }, } # GOT-specific features if kwargs.get("comic_mode"): result["comic_analysis"] = { "panels_detected": 4, "speech_bubbles": 3, "reading_order": [0, 1, 2, 3], } return result def get_capabilities(self) -> dict[str, Any]: return { "name": "got-ocr", "available": True, "modes": ["text", "formatted", "fine-grained"], "languages": ["en", "multilingual"], "gpu_support": True, "description": "General OCR Theory implementation with advanced features", } class MockTesseractBackend(OCRBackend): """Mock Tesseract backend for testing.""" def __init__(self, config: OCRConfig): super().__init__("tesseract", config) self._available = True self.call_count = 0 def is_available(self) -> bool: return self._available async def process_image(self, image_path: str, mode: str = "text", **kwargs) -> dict[str, Any]: self.call_count += 1 await asyncio.sleep(0.05) # Fast traditional OCR result = { "success": True, "text": f"Tesseract OCR result for {image_path}", "confidence": 0.85, "backend": "tesseract", "processing_time": 0.05, "mode": mode, "gpu_used": False, "tesseract_version": "5.3.0", } return result def get_capabilities(self) -> dict[str, Any]: return { "name": "tesseract", "available": True, "modes": ["text"], "languages": ["en", "multilingual"], "gpu_support": False, "description": "Traditional Tesseract OCR engine", } class MockEasyOCRBackend(OCRBackend): """Mock EasyOCR backend for testing.""" def __init__(self, config: OCRConfig): super().__init__("easyocr", config) self._available = True self.call_count = 0 def is_available(self) -> bool: return self._available async def process_image(self, image_path: str, mode: str = "text", **kwargs) -> dict[str, Any]: self.call_count += 1 await asyncio.sleep(0.1) result = { "success": True, "text": f"EasyOCR result for {image_path}", "confidence": 0.88, "backend": "easyocr", "processing_time": 0.1, "mode": mode, "gpu_used": True, "detection_boxes": [ [[10, 10], [100, 10], [100, 30], [10, 30]], [[10, 40], [150, 40], [150, 60], [10, 60]], ], } return result def get_capabilities(self) -> dict[str, Any]: return { "name": "easyocr", "available": True, "modes": ["text", "formatted"], "languages": ["en", "ch_sim", "ch_tra"], "gpu_support": True, "description": "EasyOCR with bounding box detection", } # Factory functions for creating mock backends def create_mock_deepseek_backend(config: OCRConfig) -> MockDeepSeekBackend: """Factory for DeepSeek backend mock.""" return MockDeepSeekBackend(config) def create_mock_florence_backend(config: OCRConfig) -> MockFlorenceBackend: """Factory for Florence-2 backend mock.""" return MockFlorenceBackend(config) def create_mock_dots_backend(config: OCRConfig) -> MockDOTSBackend: """Factory for DOTS.OCR backend mock.""" return MockDOTSBackend(config) def create_mock_ppocr_backend(config: OCRConfig) -> MockPPOCRBackend: """Factory for PP-OCRv5 backend mock.""" return MockPPOCRBackend(config) def create_mock_qwen_backend(config: OCRConfig) -> MockQwenBackend: """Factory for Qwen-Image-Layered backend mock.""" return MockQwenBackend(config) def create_mock_got_backend(config: OCRConfig) -> MockGOTBackend: """Factory for GOT-OCR2.0 backend mock.""" return MockGOTBackend(config) def create_mock_tesseract_backend(config: OCRConfig) -> MockTesseractBackend: """Factory for Tesseract backend mock.""" return MockTesseractBackend(config) def create_mock_easyocr_backend(config: OCRConfig) -> MockEasyOCRBackend: """Factory for EasyOCR backend mock.""" return MockEasyOCRBackend(config) # Utility functions for testing def create_mock_image(width: int = 100, height: int = 100, color: str = "white") -> Any: """Create a mock PIL Image for testing.""" from PIL import Image return Image.new("RGB", (width, height), color=color) def create_mock_ocr_result( success: bool = True, text: str = "Mock OCR text", backend: str = "test-backend", confidence: float = 0.9, ) -> dict[str, Any]: """Create a mock OCR result for testing.""" return { "success": success, "text": text, "confidence": confidence, "backend": backend, "processing_time": 0.1, "mode": "text", } def create_mock_scanner_info( device_id: str = "wia:test_scanner", name: str = "Test Scanner" ) -> dict[str, Any]: """Create mock scanner information for testing.""" return { "device_id": device_id, "name": name, "manufacturer": "Test Manufacturer", "description": f"Mock scanner: {name}", "device_type": "Flatbed", "supports_adf": True, "supports_duplex": False, "max_dpi": 600, }

Loading blob content...

Latest Blog Posts

MCP isn't dead–it's maturing
By punkpeye on January 20, 2026.
mcp
Google's AI Overview Has Been Sending Me the Wrong Customers for 6 Months
By punkpeye on January 20, 2026.
google
ai
startups
Expose Your Local MCP Server to the Internet
By punkpeye on January 19, 2026.
MCP Inspector
mcp
tutorial

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sandraschi/ocr-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

mock_backends.py•13.6 KiB