process_batch_local_files
Process multiple local files concurrently to extract text and tables into structured markdown and HTML formats using optimized OCR processing.
Instructions
Process multiple local files concurrently.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| arguments | Yes |
Output Schema
| Name | Required | Description | Default |
|---|---|---|---|
| result | Yes |
Implementation Reference
- src/mcp_mistral_ocr_opt/main.py:295-398 (handler)The `process_batch_local_files` handler is implemented in `src/mcp_mistral_ocr_opt/main.py` using `@app.tool("process_batch_local_files")`. It handles file discovery, determines whether to use inline/file batch mode or concurrent processing based on file count, and performs the OCR task.
@app.tool("process_batch_local_files") async def process_batch_local_files(arguments: Dict[str, Any]) -> List[TextContent]: """Process multiple local files concurrently.""" patterns = arguments.get("patterns") if not patterns or not isinstance(patterns, list): raise McpError( ErrorData(code=INVALID_PARAMS, message="patterns array is required") ) try: files = await discover_files( directory=config.ocr_dir, patterns=patterns, max_files=arguments.get("max_files"), ) if not files: return [ TextContent( type="text", text=json.dumps( {"message": "No files found matching patterns"}, indent=2 ), ) ] mode = config.select_processing_mode(len(files)) if mode in {"inline", "file"}: # Use batch processing from mistralai import Mistral client = Mistral(api_key=config.api_key) batch_proc = BatchProcessor(client=client, config=config) if mode == "inline": # Inline batch requests = await batch_proc.prepare_inline_batch( files=files, table_format=arguments.get("table_format"), extract_header=arguments.get("extract_header", False), extract_footer=arguments.get("extract_footer", False), include_images=arguments.get("include_images", False), ) job_id = await batch_proc.process_inline_batch(requests) result = { "mode": "batch", "batch_type": "inline", "job_id": job_id, "files_processed": len(files), "message": f"Batch job created with {len(files)} files. Use check_batch_status to monitor progress.", } else: # File batch batch_file_id = await batch_proc.prepare_file_batch( files=files, table_format=arguments.get("table_format"), extract_header=arguments.get("extract_header", False), extract_footer=arguments.get("extract_footer", False), include_images=arguments.get("include_images", False), ) job_id = await batch_proc.process_file_batch(batch_file_id) result = { "mode": "batch", "batch_type": "file", "job_id": job_id, "files_processed": len(files), "message": f"Batch job created with {len(files)} files. Use check_batch_status to monitor progress.", } else: # Use concurrent processing results = await ocr_processor.process_batch_local_files( file_paths=files, table_format=arguments.get("table_format"), extract_header=arguments.get("extract_header", False), extract_footer=arguments.get("extract_footer", False), include_images=arguments.get("include_images", False), ) successful = sum(1 for r in results if "error" not in r) failed = len(results) - successful result = { "mode": "concurrent", "files_processed": len(files), "successful": successful, "failed": failed, "results": results, } return [ TextContent( type="text", text=json.dumps(result, indent=2, ensure_ascii=False) ) ] except ValueError as e: raise McpError(ErrorData(code=INVALID_PARAMS, message=str(e))) except Exception as e: raise McpError( ErrorData(code=INTERNAL_ERROR, message=f"Error processing batch: {str(e)}") )