Skip to main content
Glama
snussik
by snussik

process_local_file

Extract text and tables from local documents using OCR, converting them into structured markdown and HTML formats for efficient data processing.

Instructions

Process a single local file from OCR_DIR.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
argumentsYes

Implementation Reference

  • Tool registration and handler for process_local_file in the MCP server.
    @app.tool("process_local_file")
    async def process_local_file(arguments: Dict[str, Any]) -> List[TextContent]:
        """Process a single local file from OCR_DIR."""
        filename = arguments.get("filename")
        if not filename:
            raise McpError(ErrorData(code=INVALID_PARAMS, message="filename is required"))
    
        try:
            file_path = resolve_path_in_dir(config.ocr_dir, filename)
        except ValueError as exc:
            raise McpError(ErrorData(code=INVALID_PARAMS, message=str(exc)))
    
        if not file_path.exists():
            raise McpError(
                ErrorData(code=INVALID_PARAMS, message=f"File not found: {filename}")
            )
    
        try:
            result = await ocr_processor.process_local_file(
                file_path=file_path,
                table_format=arguments.get("table_format"),
                extract_header=arguments.get("extract_header", False),
                extract_footer=arguments.get("extract_footer", False),
                include_images=arguments.get("include_images", False),
            )
    
            return [
                TextContent(
                    type="text", text=json.dumps(result, indent=2, ensure_ascii=False)
                )
            ]
        except ValueError as e:
            raise McpError(ErrorData(code=INVALID_PARAMS, message=str(e)))
        except Exception as e:
            raise McpError(
                ErrorData(code=INTERNAL_ERROR, message=f"Error processing file: {str(e)}")
            )
  • The actual OCR processing logic for local files.
    async def process_local_file(
        self,
        file_path: Path,
        table_format: Optional[str] = None,
        extract_header: bool = False,
        extract_footer: bool = False,
        include_images: bool = False,
    ) -> Dict[str, Any]:
        """Process a local file using Mistral's OCR capabilities.
    
        Args:
            file_path: Path to local file
            table_format: Table formatting option (null, markdown, html)
            extract_header: Extract document headers
            extract_footer: Extract document footers
            include_images: Include base64 images in output
    
        Returns:
            Dictionary with result and metadata
        """
        # Validate file
        is_valid, error_msg = validate_file_size(file_path, self.max_file_size)
        if not is_valid:
            raise ValueError(error_msg)
    
        # Get client from pool
        client_pool = await self._ensure_client_pool()
        client = await client_pool.get_client()
    
        file_type = get_file_type(file_path)
        if file_type is None:
            suffix = file_path.suffix.lower() or "unknown"
            raise ValueError(f"unsupported file type: {suffix}")
    
        try:
            # Build OCR parameters
            ocr_params = {
                "model": self.config.model,
            }
    
            # Add optional parameters (use defaults from config if not specified)
            final_table_format = table_format or self.config.default_table_format
            final_extract_header = extract_header or self.config.default_extract_header
            final_extract_footer = extract_footer or self.config.default_extract_footer
            final_include_images = include_images or self.config.default_include_images
    
            if final_table_format and final_table_format != "null":
                ocr_params["table_format"] = final_table_format
            if final_extract_header:
                ocr_params["extract_header"] = True
            if final_extract_footer:
                ocr_params["extract_footer"] = True
            if final_include_images:
                ocr_params["include_image_base64"] = True
    
            # Process based on file type
            if file_type == "image":
                # Handle image files with base64 encoding
                base64_image = encode_image_to_base64(file_path)
                if not base64_image:
                    raise ValueError("Failed to encode image")
    
                mime_type = get_mime_type(file_path)
                if not mime_type:
                    raise ValueError("Unsupported image MIME type")
    
                ocr_params["document"] = {
                    "type": "image_url",
                    "image_url": f"data:{mime_type};base64,{base64_image}",
                }
            else:
                # Handle PDF documents - encode to base64
                base64_content = encode_image_to_base64(file_path)  # Reuse for any file
    
                if not base64_content:
                    raise ValueError("Failed to encode document")

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/snussik/mcp_mistral_ocr_opt'

If you have feedback or need assistance with the MCP directory API, please join our Discord server