Skip to main content
Glama
snussik
by snussik

process_url_file

Extract text and tables from documents at URLs into structured markdown and HTML formats using optimized OCR processing.

Instructions

Process a file from a URL.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
argumentsYes

Implementation Reference

  • MCP tool registration and handler function for "process_url_file". Validates input arguments and calls the underlying ocr_processor logic.
    @app.tool("process_url_file")
    async def process_url_file(arguments: Dict[str, Any]) -> List[TextContent]:
        """Process a file from a URL."""
        url = arguments.get("url")
        file_type = arguments.get("file_type")
    
        if not url:
            raise McpError(ErrorData(code=INVALID_PARAMS, message="url is required"))
        if not file_type:
            raise McpError(ErrorData(code=INVALID_PARAMS, message="file_type is required"))
    
        if file_type not in ["image", "pdf"]:
            raise McpError(
                ErrorData(
                    code=INVALID_PARAMS, message="file_type must be either 'image' or 'pdf'"
                )
            )
    
        try:
            result = await ocr_processor.process_url_file(
                url=url,
                file_type=file_type,
                table_format=arguments.get("table_format"),
                extract_header=arguments.get("extract_header", False),
                extract_footer=arguments.get("extract_footer", False),
                include_images=arguments.get("include_images", False),
            )
    
            return [
                TextContent(
                    type="text", text=json.dumps(result, indent=2, ensure_ascii=False)
                )
            ]
        except Exception as e:
            raise McpError(
                ErrorData(code=INTERNAL_ERROR, message=f"Error processing URL: {str(e)}")
            )
  • The core implementation of the OCR processing for files provided via URL using the Mistral SDK.
    async def process_url_file(
        self,
        url: str,
        file_type: str,
        table_format: Optional[str] = None,
        extract_header: bool = False,
        extract_footer: bool = False,
        include_images: bool = False,
    ) -> Dict[str, Any]:
        """Process a file from a URL using Mistral's OCR capabilities.
    
        Args:
            url: URL of the file to process
            file_type: Type of file: 'image' or 'pdf'
            table_format: Table formatting option (null, markdown, html)
            extract_header: Extract document headers
            extract_footer: Extract document footers
            include_images: Include base64 images in output
    
        Returns:
            Dictionary with result and metadata
        """
        if file_type not in ["image", "pdf"]:
            raise ValueError("file_type must be either 'image' or 'pdf'")
    
        # Get client from pool
        client_pool = await self._ensure_client_pool()
        client = await client_pool.get_client()
    
        try:
            # Build OCR parameters
            ocr_params = {
                "model": self.config.model,
                "document": {
                    "type": "image_url" if file_type == "image" else "document_url",
                    f"{'image' if file_type == 'image' else 'document'}_url": url,
                },
            }
    
            # Add optional parameters (use defaults from config if not specified)
            final_table_format = table_format or self.config.default_table_format
            final_extract_header = extract_header or self.config.default_extract_header
            final_extract_footer = extract_footer or self.config.default_extract_footer
            final_include_images = include_images or self.config.default_include_images
    
            if final_table_format and final_table_format != "null":
                ocr_params["table_format"] = final_table_format
            if final_extract_header:
                ocr_params["extract_header"] = True
            if final_extract_footer:
                ocr_params["extract_footer"] = True
            if final_include_images:
                ocr_params["include_image_base64"] = True
    
            # Process the document
            response = await asyncio.to_thread(client.ocr.process, **ocr_params)
    
            # Convert response to JSON
            result = json.loads(self._process_response(response))
    
            # Extract filename from URL
            parsed_url = urlparse(url)
            source_name = Path(parsed_url.path).stem or "url_document"
    
            # Save result to output directory
            result_path = self._save_result(result, source_name)
    
            # Add metadata to result
            result["_metadata"] = {
                "source_url": url,
                "output_file": str(result_path),

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/snussik/mcp_mistral_ocr_opt'

If you have feedback or need assistance with the MCP directory API, please join our Discord server