process_batch_local_files
Process multiple local files concurrently to extract text and tables into structured markdown and HTML formats using optimized OCR processing.
Instructions
Process multiple local files concurrently.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| arguments | Yes |
Implementation Reference
- src/mcp_mistral_ocr_opt/main.py:295-398 (handler)The `process_batch_local_files` handler is implemented in `src/mcp_mistral_ocr_opt/main.py` using `@app.tool("process_batch_local_files")`. It handles file discovery, determines whether to use inline/file batch mode or concurrent processing based on file count, and performs the OCR task.
@app.tool("process_batch_local_files") async def process_batch_local_files(arguments: Dict[str, Any]) -> List[TextContent]: """Process multiple local files concurrently.""" patterns = arguments.get("patterns") if not patterns or not isinstance(patterns, list): raise McpError( ErrorData(code=INVALID_PARAMS, message="patterns array is required") ) try: files = await discover_files( directory=config.ocr_dir, patterns=patterns, max_files=arguments.get("max_files"), ) if not files: return [ TextContent( type="text", text=json.dumps( {"message": "No files found matching patterns"}, indent=2 ), ) ] mode = config.select_processing_mode(len(files)) if mode in {"inline", "file"}: # Use batch processing from mistralai import Mistral client = Mistral(api_key=config.api_key) batch_proc = BatchProcessor(client=client, config=config) if mode == "inline": # Inline batch requests = await batch_proc.prepare_inline_batch( files=files, table_format=arguments.get("table_format"), extract_header=arguments.get("extract_header", False), extract_footer=arguments.get("extract_footer", False), include_images=arguments.get("include_images", False), ) job_id = await batch_proc.process_inline_batch(requests) result = { "mode": "batch", "batch_type": "inline", "job_id": job_id, "files_processed": len(files), "message": f"Batch job created with {len(files)} files. Use check_batch_status to monitor progress.", } else: # File batch batch_file_id = await batch_proc.prepare_file_batch( files=files, table_format=arguments.get("table_format"), extract_header=arguments.get("extract_header", False), extract_footer=arguments.get("extract_footer", False), include_images=arguments.get("include_images", False), ) job_id = await batch_proc.process_file_batch(batch_file_id) result = { "mode": "batch", "batch_type": "file", "job_id": job_id, "files_processed": len(files), "message": f"Batch job created with {len(files)} files. Use check_batch_status to monitor progress.", } else: # Use concurrent processing results = await ocr_processor.process_batch_local_files( file_paths=files, table_format=arguments.get("table_format"), extract_header=arguments.get("extract_header", False), extract_footer=arguments.get("extract_footer", False), include_images=arguments.get("include_images", False), ) successful = sum(1 for r in results if "error" not in r) failed = len(results) - successful result = { "mode": "concurrent", "files_processed": len(files), "successful": successful, "failed": failed, "results": results, } return [ TextContent( type="text", text=json.dumps(result, indent=2, ensure_ascii=False) ) ] except ValueError as e: raise McpError(ErrorData(code=INVALID_PARAMS, message=str(e))) except Exception as e: raise McpError( ErrorData(code=INTERNAL_ERROR, message=f"Error processing batch: {str(e)}") )