OCR-MCP

ocr_sampling_handler.py•18.1 KiB

""" OCR Sampling Handler for FastMCP 2.14.3 Provides AI-powered sampling capabilities for intelligent document processing workflows, enabling autonomous orchestration of OCR operations based on document characteristics. """ import logging from dataclasses import dataclass from typing import Any logger = logging.getLogger(__name__) @dataclass class SamplingRequest: """Request structure for OCR sampling operations.""" messages: list[dict[str, Any]] tools: list[dict[str, Any]] max_tokens: int | None = None temperature: float | None = None system_prompt: str | None = None @dataclass class SamplingResult: """Result structure for OCR sampling operations.""" content: str usage: dict[str, Any] model: str finish_reason: str class OCRSamplingHandler: """ FastMCP 2.14.3 Sampling Handler for OCR operations. Enables AI-powered document processing workflows by providing sampling capabilities that allow the server to make intelligent decisions about OCR backend selection, preprocessing steps, and processing strategies. """ def __init__(self, backend_manager=None, config=None): """ Initialize the OCR sampling handler. Args: backend_manager: OCR backend manager for tool execution config: OCR configuration object """ self.backend_manager = backend_manager self.config = config self.logger = logging.getLogger(__name__) async def sample(self, request: SamplingRequest) -> SamplingResult: """ Execute sampling request for OCR operations with FastMCP 2.14.3 SEP-1577 compliance. This method enables AI-powered agentic document processing workflows: - Intelligent OCR backend selection based on document analysis - Autonomous preprocessing pipeline construction - Quality assessment and error recovery strategies - Workflow orchestration without client round-trips - Multi-document batch processing optimization Args: request: Sampling request with messages, tools, and parameters Returns: SamplingResult with AI-generated response and metadata """ try: # Extract the user's query from messages user_query = "" system_context = "" for message in request.messages: if message.get("role") == "user": user_query = message.get("content", "") elif message.get("role") == "system": system_context = message.get("content", "") # Enhanced query analysis with backend manager context analysis = await self._analyze_ocr_query_enhanced( user_query, system_context, request.tools ) # Generate intelligent response with workflow orchestration response_content = await self._generate_agentic_ocr_response( user_query, analysis, request.tools ) # Enhanced usage statistics with processing metrics usage = { "prompt_tokens": len(user_query.split()), "completion_tokens": len(response_content.split()), "total_tokens": len(user_query.split()) + len(response_content.split()), "model": "ocr-agentic-processor-v2", "processing_strategy": analysis.get("strategy", "unknown"), "backends_analyzed": len(analysis.get("recommended_backends", [])), "workflow_complexity": analysis.get("complexity_score", 0.0), } return SamplingResult( content=response_content, usage=usage, model="ocr-agentic-processor-sep1577", finish_reason="completed", ) except Exception as e: self.logger.error(f"OCR sampling failed: {e}") return SamplingResult( content=f"I apologize, but I encountered an error while processing your OCR request: {str(e)}. Please try again with a simpler query.", usage={"error": True}, model="ocr-sampling-handler-v1", finish_reason="error", ) async def _analyze_ocr_query_enhanced( self, user_query: str, system_context: str, available_tools: list[dict[str, Any]] ) -> dict[str, Any]: """ Enhanced OCR query analysis with backend manager integration. Analyzes user queries to determine optimal OCR processing strategies, backend selection, and workflow orchestration. """ query_lower = user_query.lower() analysis = { "strategy": "single_document", "recommended_backends": [], "complexity_score": 0.0, "needs_preprocessing": False, "batch_processing": False, "quality_assessment_needed": False, "layout_analysis": False, "workflow_orchestration": False, } # Detect document types and processing needs if any( word in query_lower for word in ["batch", "multiple", "folder", "directory", "all files"] ): analysis["strategy"] = "batch_processing" analysis["batch_processing"] = True analysis["complexity_score"] += 0.3 if any(word in query_lower for word in ["table", "form", "layout", "structure"]): analysis["layout_analysis"] = True analysis["complexity_score"] += 0.2 if any(word in query_lower for word in ["quality", "accuracy", "confidence", "compare"]): analysis["quality_assessment_needed"] = True analysis["complexity_score"] += 0.2 if any(word in query_lower for word in ["workflow", "pipeline", "automate", "orchestrate"]): analysis["workflow_orchestration"] = True analysis["complexity_score"] += 0.4 # Backend recommendations based on content analysis if any(word in query_lower for word in ["math", "formula", "scientific", "technical"]): analysis["recommended_backends"].extend(["deepseek-ocr", "florence-2"]) elif any(word in query_lower for word in ["table", "spreadsheet", "structured"]): analysis["recommended_backends"].extend(["florence-2", "pp-ocrv5"]) elif any(word in query_lower for word in ["multilingual", "foreign", "international"]): analysis["recommended_backends"].extend(["easyocr", "deepseek-ocr"]) else: analysis["recommended_backends"].extend(["pp-ocrv5", "tesseract"]) # Check for preprocessing needs if any( word in query_lower for word in ["noisy", "blurry", "dark", "enhance", "preprocess"] ): analysis["needs_preprocessing"] = True # Use backend manager for capability assessment if self.backend_manager: available_backends = self.backend_manager.get_available_backends() analysis["available_backends"] = available_backends analysis["recommended_backends"] = [ b for b in analysis["recommended_backends"] if b in available_backends ] return analysis async def _generate_agentic_ocr_response( self, user_query: str, analysis: dict[str, Any], available_tools: list[dict[str, Any]] ) -> str: """ Generate agentic OCR response with workflow orchestration. """ strategy = analysis.get("strategy", "single_document") if strategy == "batch_processing": return await self._generate_batch_processing_workflow(user_query, analysis) elif analysis.get("workflow_orchestration"): return await self._generate_workflow_orchestration_response(user_query, analysis) elif analysis.get("quality_assessment_needed"): return await self._generate_quality_assessment_workflow(user_query, analysis) else: return await self._generate_standard_ocr_workflow(user_query, analysis) async def _generate_batch_processing_workflow( self, user_query: str, analysis: dict[str, Any] ) -> str: """Generate intelligent batch processing workflow.""" workflow_steps = [ "Analyze document characteristics and folder structure", "Select optimal OCR backend based on content type", "Apply intelligent preprocessing if needed", "Process documents with parallel execution", "Generate comprehensive quality report", "Provide actionable recommendations for failed documents", ] response = """I'll orchestrate an intelligent batch OCR processing workflow for your documents. **Workflow Strategy:** - **Analysis**: Examine document types, quality, and optimal processing approaches - **Backend Selection**: Auto-select best OCR engines based on content characteristics - **Parallel Processing**: Process multiple documents simultaneously with smart concurrency - **Quality Assurance**: Comprehensive accuracy assessment and error recovery - **Optimization**: Continuous improvement based on results **Recommended Actions:** 1. `agentic_document_workflow(operation="process_batch_intelligent", source_path="your_folder_path")` 2. Review quality assessment report for optimization opportunities 3. Use failed document analysis for targeted improvements **Expected Benefits:** - 70-90% reduction in manual quality assessment time - Automatic backend selection for optimal accuracy - Intelligent error recovery and preprocessing - Comprehensive batch processing analytics Would you like me to proceed with analyzing your document folder?""" return response async def _generate_workflow_orchestration_response( self, user_query: str, analysis: dict[str, Any] ) -> str: """Generate workflow orchestration response.""" return """I'll create an intelligent OCR processing pipeline that orchestrates multiple tools autonomously. **SEP-1577 Agentic Workflow Features:** - **Autonomous Backend Selection**: AI chooses optimal OCR engines - **Intelligent Preprocessing**: Automatic image enhancement detection - **Quality-Gated Processing**: Only high-confidence results pass through - **Error Recovery**: Automatic retry with alternative strategies - **Workflow Optimization**: Continuous improvement based on results **Recommended Starting Point:** `agentic_document_workflow(operation="create_intelligent_pipeline", document_types=["pdf", "images"])` This will create a self-optimizing OCR pipeline that improves with each document processed.""" async def _generate_quality_assessment_workflow( self, user_query: str, analysis: dict[str, Any] ) -> str: """Generate quality assessment workflow.""" return """I'll perform comprehensive OCR quality assessment with intelligent backend comparison. **Multi-Dimensional Quality Analysis:** - **Character Accuracy**: CER/WER measurements with ground truth - **Layout Preservation**: Table/form structure integrity - **Backend Comparison**: Performance analysis across OCR engines - **Confidence Scoring**: Per-element reliability assessment - **Error Pattern Analysis**: Systematic accuracy improvement **Recommended Workflow:** 1. `document_processing(operation="compare_backends", source_path="your_document")` 2. `document_processing(operation="assess_quality", ocr_result="result_from_step1")` 3. `workflow_management(operation="create_quality_optimization_pipeline")` This provides data-driven OCR accuracy optimization.""" async def _generate_standard_ocr_workflow( self, user_query: str, analysis: dict[str, Any] ) -> str: """Generate standard OCR processing workflow.""" backends = analysis.get("recommended_backends", ["auto"]) response = f"""I'll process your document using intelligent OCR backend selection. **Recommended OCR Strategy:** - **Primary Backend**: {backends[0] if backends else "auto"} - **Fallback Options**: {", ".join(backends[1:]) if len(backends) > 1 else "tesseract, easyocr"} - **Enhancement**: Automatic preprocessing if image quality issues detected - **Quality Assurance**: Confidence scoring and accuracy validation **Processing Command:** `document_processing(operation="process_document", source_path="your_file_path", backend="auto")` **Next Steps After Processing:** - Review confidence score (target: >0.8 for high reliability) - Use layout analysis for structured content - Apply quality assessment for accuracy validation Would you like me to proceed with processing your document?""" return response async def _analyze_ocr_query(self, query: str) -> dict[str, Any]: self.logger.error(f"OCR sampling failed: {e}") return SamplingResult( content=f"I apologize, but I encountered an error while processing your OCR request: {str(e)}. Please try again with a simpler query.", usage={"error": True}, model="ocr-sampling-handler-v1", finish_reason="error", ) async def _analyze_ocr_query(self, query: str) -> dict[str, Any]: """ Analyze OCR query to determine processing strategy. Args: query: User query about OCR operations Returns: Analysis dictionary with processing recommendations """ analysis = { "document_types": [], "processing_steps": [], "backends": [], "confidence": 0.0, "complexity": "simple", } query_lower = query.lower() # Determine document types if any(word in query_lower for word in ["pdf", "document", "file"]): analysis["document_types"].append("pdf") if any(word in query_lower for word in ["image", "photo", "picture", "scan"]): analysis["document_types"].append("image") if any(word in query_lower for word in ["text", "ocr", "extract"]): analysis["processing_steps"].append("text_extraction") # Determine processing complexity if "batch" in query_lower or "multiple" in query_lower or "all" in query_lower: analysis["complexity"] = "batch" analysis["processing_steps"].append("batch_processing") elif "analyze" in query_lower or "quality" in query_lower: analysis["complexity"] = "analysis" analysis["processing_steps"].append("quality_assessment") elif "convert" in query_lower or "format" in query_lower: analysis["complexity"] = "conversion" analysis["processing_steps"].append("format_conversion") # Recommend OCR backends based on content if "math" in query_lower or "formula" in query_lower: analysis["backends"].append("deepseek-ocr") if "layout" in query_lower or "structure" in query_lower: analysis["backends"].append("florence-2") if "speed" in query_lower or "fast" in query_lower: analysis["backends"].append("pp-ocrv5") # Default backend if none specified if not analysis["backends"]: analysis["backends"].append("auto") analysis["confidence"] = 0.8 if analysis["document_types"] else 0.5 return analysis async def _generate_ocr_response( self, query: str, analysis: dict[str, Any], tools: list[dict[str, Any]] ) -> str: """ Generate intelligent OCR response based on analysis. Args: query: Original user query analysis: Query analysis results tools: Available tools for execution Returns: Intelligent response with processing recommendations """ response_parts = [] # Greeting and understanding response_parts.append("I understand you want to process documents with OCR. ") # Document type confirmation if analysis["document_types"]: doc_types = ", ".join(analysis["document_types"]) response_parts.append(f"I'll help you process {doc_types} files. ") else: response_parts.append("I'll help you with OCR processing. ") # Processing strategy if analysis["complexity"] == "batch": response_parts.append( "For batch processing, I'll use intelligent workflow orchestration to handle multiple documents efficiently. " ) elif analysis["complexity"] == "analysis": response_parts.append( "I'll perform comprehensive quality assessment and document analysis. " ) elif analysis["complexity"] == "conversion": response_parts.append( "I'll handle format conversion with optimal quality preservation. " ) # Backend recommendations if analysis["backends"] and analysis["backends"][0] != "auto": backends = ", ".join(analysis["backends"]) response_parts.append( f"Based on your requirements, I'll use {backends} for optimal results. " ) # Available tools tool_names = [tool.get("name", "") for tool in tools if tool.get("name")] if tool_names: response_parts.append( f"I have access to these OCR tools: {', '.join(tool_names[:3])}{'...' if len(tool_names) > 3 else ''}. " ) # Next steps response_parts.append( "Would you like me to proceed with the OCR processing, or do you need to specify particular files or settings?" ) return "".join(response_parts) async def get_available_models(self) -> list[str]: """ Get list of available AI models for sampling. Returns: List of model identifiers """ return ["ocr-intelligent-processor", "document-analysis-model"] async def check_health(self) -> dict[str, Any]: """ Check sampling handler health status. Returns: Health status dictionary """ return { "status": "healthy", "backend_available": self.backend_manager is not None, "config_loaded": self.config is not None, "sampling_enabled": True, }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sandraschi/ocr-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

ocr_sampling_handler.py•18.1 KiB