Skip to main content
Glama
snussik
by snussik

process_batch_local_files

Process multiple local files concurrently to extract text and tables into structured markdown and HTML formats using optimized OCR processing.

Instructions

Process multiple local files concurrently.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
argumentsYes

Implementation Reference

  • The `process_batch_local_files` handler is implemented in `src/mcp_mistral_ocr_opt/main.py` using `@app.tool("process_batch_local_files")`. It handles file discovery, determines whether to use inline/file batch mode or concurrent processing based on file count, and performs the OCR task.
    @app.tool("process_batch_local_files")
    async def process_batch_local_files(arguments: Dict[str, Any]) -> List[TextContent]:
        """Process multiple local files concurrently."""
        patterns = arguments.get("patterns")
        if not patterns or not isinstance(patterns, list):
            raise McpError(
                ErrorData(code=INVALID_PARAMS, message="patterns array is required")
            )
    
        try:
            files = await discover_files(
                directory=config.ocr_dir,
                patterns=patterns,
                max_files=arguments.get("max_files"),
            )
    
            if not files:
                return [
                    TextContent(
                        type="text",
                        text=json.dumps(
                            {"message": "No files found matching patterns"}, indent=2
                        ),
                    )
                ]
    
            mode = config.select_processing_mode(len(files))
            if mode in {"inline", "file"}:
                # Use batch processing
                from mistralai import Mistral
    
                client = Mistral(api_key=config.api_key)
                batch_proc = BatchProcessor(client=client, config=config)
    
                if mode == "inline":
                    # Inline batch
                    requests = await batch_proc.prepare_inline_batch(
                        files=files,
                        table_format=arguments.get("table_format"),
                        extract_header=arguments.get("extract_header", False),
                        extract_footer=arguments.get("extract_footer", False),
                        include_images=arguments.get("include_images", False),
                    )
                    job_id = await batch_proc.process_inline_batch(requests)
    
                    result = {
                        "mode": "batch",
                        "batch_type": "inline",
                        "job_id": job_id,
                        "files_processed": len(files),
                        "message": f"Batch job created with {len(files)} files. Use check_batch_status to monitor progress.",
                    }
                else:
                    # File batch
                    batch_file_id = await batch_proc.prepare_file_batch(
                        files=files,
                        table_format=arguments.get("table_format"),
                        extract_header=arguments.get("extract_header", False),
                        extract_footer=arguments.get("extract_footer", False),
                        include_images=arguments.get("include_images", False),
                    )
                    job_id = await batch_proc.process_file_batch(batch_file_id)
    
                    result = {
                        "mode": "batch",
                        "batch_type": "file",
                        "job_id": job_id,
                        "files_processed": len(files),
                        "message": f"Batch job created with {len(files)} files. Use check_batch_status to monitor progress.",
                    }
            else:
                # Use concurrent processing
                results = await ocr_processor.process_batch_local_files(
                    file_paths=files,
                    table_format=arguments.get("table_format"),
                    extract_header=arguments.get("extract_header", False),
                    extract_footer=arguments.get("extract_footer", False),
                    include_images=arguments.get("include_images", False),
                )
    
                successful = sum(1 for r in results if "error" not in r)
                failed = len(results) - successful
    
                result = {
                    "mode": "concurrent",
                    "files_processed": len(files),
                    "successful": successful,
                    "failed": failed,
                    "results": results,
                }
    
            return [
                TextContent(
                    type="text", text=json.dumps(result, indent=2, ensure_ascii=False)
                )
            ]
    
        except ValueError as e:
            raise McpError(ErrorData(code=INVALID_PARAMS, message=str(e)))
        except Exception as e:
            raise McpError(
                ErrorData(code=INTERNAL_ERROR, message=f"Error processing batch: {str(e)}")
            )

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/snussik/mcp_mistral_ocr_opt'

If you have feedback or need assistance with the MCP directory API, please join our Discord server