process_local_file
Extract text and tables from local documents using OCR, converting them into structured markdown and HTML formats for efficient data processing.
Instructions
Process a single local file from OCR_DIR.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| arguments | Yes |
Implementation Reference
- src/mcp_mistral_ocr_opt/main.py:256-292 (handler)Tool registration and handler for process_local_file in the MCP server.
@app.tool("process_local_file") async def process_local_file(arguments: Dict[str, Any]) -> List[TextContent]: """Process a single local file from OCR_DIR.""" filename = arguments.get("filename") if not filename: raise McpError(ErrorData(code=INVALID_PARAMS, message="filename is required")) try: file_path = resolve_path_in_dir(config.ocr_dir, filename) except ValueError as exc: raise McpError(ErrorData(code=INVALID_PARAMS, message=str(exc))) if not file_path.exists(): raise McpError( ErrorData(code=INVALID_PARAMS, message=f"File not found: {filename}") ) try: result = await ocr_processor.process_local_file( file_path=file_path, table_format=arguments.get("table_format"), extract_header=arguments.get("extract_header", False), extract_footer=arguments.get("extract_footer", False), include_images=arguments.get("include_images", False), ) return [ TextContent( type="text", text=json.dumps(result, indent=2, ensure_ascii=False) ) ] except ValueError as e: raise McpError(ErrorData(code=INVALID_PARAMS, message=str(e))) except Exception as e: raise McpError( ErrorData(code=INTERNAL_ERROR, message=f"Error processing file: {str(e)}") ) - The actual OCR processing logic for local files.
async def process_local_file( self, file_path: Path, table_format: Optional[str] = None, extract_header: bool = False, extract_footer: bool = False, include_images: bool = False, ) -> Dict[str, Any]: """Process a local file using Mistral's OCR capabilities. Args: file_path: Path to local file table_format: Table formatting option (null, markdown, html) extract_header: Extract document headers extract_footer: Extract document footers include_images: Include base64 images in output Returns: Dictionary with result and metadata """ # Validate file is_valid, error_msg = validate_file_size(file_path, self.max_file_size) if not is_valid: raise ValueError(error_msg) # Get client from pool client_pool = await self._ensure_client_pool() client = await client_pool.get_client() file_type = get_file_type(file_path) if file_type is None: suffix = file_path.suffix.lower() or "unknown" raise ValueError(f"unsupported file type: {suffix}") try: # Build OCR parameters ocr_params = { "model": self.config.model, } # Add optional parameters (use defaults from config if not specified) final_table_format = table_format or self.config.default_table_format final_extract_header = extract_header or self.config.default_extract_header final_extract_footer = extract_footer or self.config.default_extract_footer final_include_images = include_images or self.config.default_include_images if final_table_format and final_table_format != "null": ocr_params["table_format"] = final_table_format if final_extract_header: ocr_params["extract_header"] = True if final_extract_footer: ocr_params["extract_footer"] = True if final_include_images: ocr_params["include_image_base64"] = True # Process based on file type if file_type == "image": # Handle image files with base64 encoding base64_image = encode_image_to_base64(file_path) if not base64_image: raise ValueError("Failed to encode image") mime_type = get_mime_type(file_path) if not mime_type: raise ValueError("Unsupported image MIME type") ocr_params["document"] = { "type": "image_url", "image_url": f"data:{mime_type};base64,{base64_image}", } else: # Handle PDF documents - encode to base64 base64_content = encode_image_to_base64(file_path) # Reuse for any file if not base64_content: raise ValueError("Failed to encode document")