MD-PDF MCP Server

Overview Schema Related Servers Score Discussions

md-pdf-mcp
tools

pdf_tools.py•8.14 KiB

#!/usr/bin/env python3 """ MCP tools for PDF to Markdown conversion. """ import os from pathlib import Path from server import mcp from utils.converters import pdf_to_markdown, resolve_path @mcp.tool() def convert_pdf_to_markdown( pdf_file_path: str, output_filename: str = None ) -> str: """ Convert a PDF file to Markdown format. Args: pdf_file_path: Path to the PDF file to convert output_filename: Name of the output Markdown file (if not provided, uses same name as input with .md extension) Returns: A message indicating success or failure of the conversion """ try: # Resolve input path (handles both relative and absolute paths) input_path = resolve_path(pdf_file_path) if not input_path.exists(): return f"Error: Input file '{pdf_file_path}' does not exist at resolved path: {input_path}" if not input_path.suffix.lower() == '.pdf': return f"Error: '{pdf_file_path}' is not a PDF file. Please provide a .pdf file." # Set output filename if not provided if output_filename is None: output_filename = input_path.stem + '.md' elif not output_filename.lower().endswith('.md'): output_filename += '.md' # Resolve output path (handles both relative and absolute paths) output_path = resolve_path(output_filename) output_path.parent.mkdir(parents=True, exist_ok=True) # Convert PDF to Markdown success, message = pdf_to_markdown( pdf_path=str(input_path), output_path=str(output_path) ) if success: file_size = output_path.stat().st_size if output_path.exists() else 0 # Read a preview of the content try: with open(output_path, 'r', encoding='utf-8') as f: content = f.read() preview = content[:500] + "..." if len(content) > 500 else content return f"{message}\nInput: {pdf_file_path} (resolved to: {input_path})\nFile size: {file_size:,} bytes\n\nContent preview:\n{preview}" except Exception: return f"{message}\nInput: {pdf_file_path} (resolved to: {input_path})\nFile size: {file_size:,} bytes" else: return f"Conversion failed: {message}" except Exception as e: return f"Error during PDF to Markdown conversion: {str(e)}" @mcp.tool() def extract_text_from_pdf( working_dir: str, pdf_file_path: str, page_numbers: str = "all" ) -> str: """ Extract text content from a PDF file without conversion to Markdown. Args: working_dir: Absolute path to the working directory for file operations pdf_file_path: Path to the PDF file relative to working_dir page_numbers: Page numbers to extract - "all" for all pages, or comma-separated numbers like "1,3,5" or "1-5" Returns: The extracted text content from the PDF """ try: import pdfplumber # Validate working directory working_path = Path(working_dir) if not working_path.is_absolute(): return f"Error: working_dir must be an absolute path, got: {working_dir}" if not working_path.exists(): return f"Error: working_dir does not exist: {working_dir}" if not working_path.is_dir(): return f"Error: working_dir is not a directory: {working_dir}" # Create input path relative to working directory input_path = working_path / pdf_file_path if not input_path.exists(): return f"Error: Input file '{pdf_file_path}' does not exist at: {input_path}" if not input_path.suffix.lower() == '.pdf': return f"Error: '{pdf_file_path}' is not a PDF file. Please provide a .pdf file." extracted_text = [] with pdfplumber.open(input_path) as pdf: total_pages = len(pdf.pages) # Parse page numbers if page_numbers.lower() == "all": pages_to_extract = range(total_pages) else: pages_to_extract = [] for part in page_numbers.split(','): part = part.strip() if '-' in part: # Range like "1-5" start, end = map(int, part.split('-')) pages_to_extract.extend(range(start-1, min(end, total_pages))) else: # Single page page_num = int(part) - 1 # Convert to 0-based if 0 <= page_num < total_pages: pages_to_extract.append(page_num) # Extract text from specified pages for page_idx in pages_to_extract: page = pdf.pages[page_idx] text = page.extract_text() if text: extracted_text.append(f"=== Page {page_idx + 1} ===\n{text}\n") if extracted_text: full_text = "\n".join(extracted_text) return f"Successfully extracted text from {len(pages_to_extract)} page(s) of '{pdf_file_path}' (at: {input_path}):\n\n{full_text}" else: return f"No text content found in the specified pages of '{pdf_file_path}' (at: {input_path})" except Exception as e: return f"Error during text extraction: {str(e)}" @mcp.tool() def get_pdf_info( working_dir: str, pdf_file_path: str ) -> str: """ Get information about a PDF file (number of pages, metadata, etc.). Args: working_dir: Absolute path to the working directory for file operations pdf_file_path: Path to the PDF file relative to working_dir Returns: Information about the PDF file """ try: import pdfplumber # Validate working directory working_path = Path(working_dir) if not working_path.is_absolute(): return f"Error: working_dir must be an absolute path, got: {working_dir}" if not working_path.exists(): return f"Error: working_dir does not exist: {working_dir}" if not working_path.is_dir(): return f"Error: working_dir is not a directory: {working_dir}" # Create input path relative to working directory input_path = working_path / pdf_file_path if not input_path.exists(): return f"Error: Input file '{pdf_file_path}' does not exist at: {input_path}" if not input_path.suffix.lower() == '.pdf': return f"Error: '{pdf_file_path}' is not a PDF file. Please provide a .pdf file." info_lines = [] info_lines.append(f"PDF File: {pdf_file_path} (at: {input_path})") info_lines.append(f"File Size: {input_path.stat().st_size:,} bytes") with pdfplumber.open(input_path) as pdf: info_lines.append(f"Number of Pages: {len(pdf.pages)}") # Get metadata if available metadata = pdf.metadata if metadata: info_lines.append("\nMetadata:") for key, value in metadata.items(): if value: info_lines.append(f" {key}: {value}") # Get page dimensions for first page if pdf.pages: first_page = pdf.pages[0] info_lines.append(f"\nPage Dimensions (first page):") info_lines.append(f" Width: {first_page.width:.1f} points") info_lines.append(f" Height: {first_page.height:.1f} points") # Try to get some text from first page as sample sample_text = first_page.extract_text() if sample_text: preview = sample_text[:200] + "..." if len(sample_text) > 200 else sample_text info_lines.append(f"\nSample text from first page:\n{preview}") return "\n".join(info_lines) except Exception as e: return f"Error getting PDF info: {str(e)}"

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/kareemaly/md-pdf-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

pdf_tools.py•8.14 KiB