convert_pdf_file
Convert PDF files to Markdown format using OCR technology, preserving document structure and extracting images for easier editing and content reuse.
Instructions
Convert a local PDF file to Markdown. Output is saved in a new folder named after the PDF in its original directory.
Args:
file_path: Path to a local PDF file or multiple paths separated by spaces, commas, or newlines.
Returns:
A dictionary with the conversion results.Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| file_path | Yes |
Implementation Reference
- src/pdf2md/server.py:145-208 (handler)The primary handler function for the 'convert_pdf_file' MCP tool. It processes local PDF file(s), uses Mistral AI OCR to convert to markdown, saves output files and images in a directory next to the original PDF, and returns results. Registered via @mcp.tool() decorator.
@mcp.tool() async def convert_pdf_file(file_path: str) -> Dict[str, Any]: """ Convert a local PDF file to Markdown. Output is saved in a new folder named after the PDF in its original directory. Args: file_path: Path to a local PDF file or multiple paths separated by spaces, commas, or newlines. Returns: A dictionary with the conversion results. """ if not MISTRAL_API_KEY: return {"success": False, "error": "Missing API key, please set environment variable MISTRAL_API_KEY"} try: client = Mistral(api_key=MISTRAL_API_KEY) except Exception as e: return {"success": False, "error": f"Error initializing Mistral client: {e}"} file_paths = parse_input_string(file_path) results = [] for path_str in file_paths: try: input_path = Path(path_str) if not input_path.exists() or not input_path.name.lower().endswith('.pdf'): results.append({"file_path": path_str, "success": False, "error": "File does not exist or is not a PDF."}) continue # Create a new directory for output next to the original file output_dir = input_path.parent / input_path.stem output_dir.mkdir(parents=True, exist_ok=True) output_md_path = output_dir / f"{input_path.stem}.md" with open(input_path, "rb") as pdf_file: base64_pdf = base64.b64encode(pdf_file.read()).decode('utf-8') ocr_response = client.ocr.process( model="mistral-ocr-latest", document={"type": "document_url", "document_url": f"data:application/pdf;base64,{base64_pdf}"}, include_image_base64=True ) markdown_content, saved_images = save_ocr_response_to_markdown_and_images( ocr_response, output_md_path, output_dir ) if markdown_content is not None: results.append({ "file_path": path_str, "success": True, "markdown_file": str(output_md_path), "images": saved_images, "output_directory": str(output_dir), "content_length": len(markdown_content) }) else: results.append({"file_path": path_str, "success": False, "error": "Could not save markdown or images."}) except Exception as e: results.append({"file_path": path_str, "success": False, "error": f"Error processing file '{path_str}': {e}"}) return {"success": any(r.get("success", False) for r in results), "results": results} - src/pdf2md/server.py:61-75 (helper)Helper utility to parse the input file_path string into a list of individual paths, handling quotes, commas, spaces, and newlines.
def parse_input_string(input_string: str) -> List[str]: """Parses a string of paths or URLs separated by spaces, commas, or newlines.""" if (input_string.startswith('"') and input_string.endswith('"')) or \ (input_string.startswith("'") and input_string.endswith("'")): input_string = input_string[1:-1] items = " ".join(input_string.replace(",", " ").split()).split() cleaned_items = [] for item in items: if (item.startswith('"') and item.endswith('"')) or \ (item.startswith("'") and item.endswith("'")): cleaned_items.append(item[1:-1]) else: cleaned_items.append(item) return [item for item in cleaned_items if item] - src/pdf2md/server.py:40-60 (helper)Helper function that saves the OCR response's markdown content to a file and extracts/saves embedded images to disk.
def save_ocr_response_to_markdown_and_images(ocr_response, output_md_path, output_dir_for_images): """ Saves the markdown content from each page of the OCR response to a file and saves any associated images. """ full_markdown_content = [] saved_images = [] try: with open(output_md_path, "wt", encoding='utf-8') as f: for page in ocr_response.pages: f.write(page.markdown) full_markdown_content.append(page.markdown) for image in page.images: saved_image_path = save_image(image, output_dir_for_images) if saved_image_path: saved_images.append(saved_image_path) return "".join(full_markdown_content), saved_images except Exception as e: print(f"Error saving markdown file '{output_md_path}' or processing images: {e}") return None, []