Skip to main content
Glama

mcp-pandoc

by vivekVells
"""mcp-pandoc server module.""" import os import mcp.server.stdio import mcp.types as types import pypandoc import yaml from mcp.server import NotificationOptions, Server from mcp.server.models import InitializationOptions server = Server("mcp-pandoc") @server.list_tools() async def handle_list_tools() -> list[types.Tool]: """List available tools. Each tool specifies its arguments using JSON Schema validation. """ return [ types.Tool( name="convert-contents", description=( "Converts content between different formats. Transforms input content from any supported format " "into the specified output format.\n\n" "🚨 CRITICAL REQUIREMENTS - PLEASE READ:\n" "1. PDF Conversion:\n" " * You MUST install TeX Live BEFORE attempting PDF conversion:\n" " * Ubuntu/Debian: `sudo apt-get install texlive-xetex`\n" " * macOS: `brew install texlive`\n" " * Windows: Install MiKTeX or TeX Live from https://miktex.org/ or https://tug.org/texlive/\n" " * PDF conversion will FAIL without this installation\n\n" "2. File Paths - EXPLICIT REQUIREMENTS:\n" " * When asked to save or convert to a file, you MUST provide:\n" " - Complete directory path\n" " - Filename\n" " - File extension\n" " * Example request: 'Write a story and save as PDF'\n" " * You MUST specify: '/path/to/story.pdf' or 'C:\\Documents\\story.pdf'\n" " * The tool will NOT automatically generate filenames or extensions\n\n" "3. File Location After Conversion:\n" " * After successful conversion, the tool will display the exact path where the file is saved\n" " * Look for message: 'Content successfully converted and saved to: [file_path]'\n" " * You can find your converted file at the specified location\n" " * If no path is specified, files may be saved in system temp directory (/tmp/ on Unix systems)\n" " * For better control, always provide explicit output file paths\n\n" "Supported formats:" "- Basic: txt, html, markdown, ipynb, odt" "- Advanced (REQUIRE complete file paths): pdf, docx, rst, latex, epub" "✅ CORRECT Usage Examples:\n" "1. 'Convert this text to HTML' (basic conversion)\n" " - Tool will show converted content\n\n" "2. 'Save this text as PDF at /documents/story.pdf'\n" " - Correct: specifies path + filename + extension\n" " - Tool will show: 'Content successfully converted and saved to: /documents/story.pdf'\n\n" "❌ INCORRECT Usage Examples:\n" "1. 'Save this as PDF in /documents/'\n" " - Missing filename and extension\n" "2. 'Convert to PDF'\n" " - Missing complete file path\n\n" "When requesting conversion, ALWAYS specify:\n" "1. The content or input file\n" "2. The desired output format\n" "3. For advanced formats: complete output path + filename + extension\n" "Example: 'Convert this markdown to PDF and save as /path/to/output.pdf'\n\n" "🎨 DOCX STYLING (NEW FEATURE):\n" "4. Custom DOCX Styling with Reference Documents:\n" " * Use reference_doc parameter to apply professional styling to DOCX output\n" " * Create custom templates with your branding, fonts, and formatting\n" " * Perfect for corporate reports, academic papers, and professional documents\n" " * Example: 'Convert this report to DOCX using /templates/corporate-style.docx as reference " "and save as /reports/Q4-report.docx'\n\n" "🎯 PANDOC FILTERS (NEW FEATURE):\n" "5. Pandoc Filter Support:\n" " * Use filters parameter to apply custom Pandoc filters during conversion\n" " * Filters are Python scripts that modify document content during processing\n" " * Perfect for Mermaid diagram conversion, custom styling, and content transformation\n" " * Example: 'Convert this markdown with mermaid diagrams to DOCX using " "filters=[\"./filters/mermaid-to-png-vibrant.py\"] and save as /reports/diagram-report.docx'\n\n" "📋 Creating Reference Documents:\n" " * Generate template: pandoc -o template.docx --print-default-data-file reference.docx\n" " * Customize in Word/LibreOffice: fonts, colors, headers, margins\n" " * Use for consistent branding across all documents\n\n" "📋 Filter Requirements:\n" " * Filters must be executable Python scripts\n" " * Use absolute paths or paths relative to current working directory\n" " * Filters are applied in the order specified\n" " * Common filters: mermaid conversion, color processing, table formatting\n\n" "📄 Defaults File Support (NEW FEATURE):\n" "7. Pandoc Defaults File Support:\n" " * Use defaults_file parameter to specify a YAML configuration file\n" " * Similar to using pandoc -d option in the command line\n" " * Allows setting multiple options in a single file\n" " * Options in the defaults file can include filters, reference-doc, and other Pandoc options\n" " * Example: 'Convert this markdown to DOCX using defaults_file=\"/path/to/defaults.yaml\" " "and save as /reports/report.docx'\n\n" "Note: After conversion, always check the success message for the exact file location." ), inputSchema={ "type": "object", "properties": { "contents": { "type": "string", "description": "The content to be converted (required if input_file not provided)" }, "input_file": { "type": "string", "description": ( "Complete path to input file including filename and extension " "(e.g., '/path/to/input.md')" ) }, "input_format": { "type": "string", "description": "Source format of the content (defaults to markdown)", "default": "markdown", "enum": ["markdown", "html", "pdf", "docx", "rst", "latex", "epub", "txt", "ipynb", "odt"] }, "output_format": { "type": "string", "description": "Desired output format (defaults to markdown)", "default": "markdown", "enum": ["markdown", "html", "pdf", "docx", "rst", "latex", "epub", "txt", "ipynb", "odt"] }, "output_file": { "type": "string", "description": ( "Complete path where to save the output including filename and extension " "(required for pdf, docx, rst, latex, epub formats)" ) }, "reference_doc": { "type": "string", "description": ( "Path to a reference document to use for styling " "(supported for docx output format)" ) }, "filters": { "type": "array", "items": {"type": "string"}, "description": ( "List of Pandoc filter paths to apply during conversion. " "Filters are applied in the order specified." ) }, "defaults_file": { "type": "string", "description": ( "Path to a Pandoc defaults file (YAML) containing conversion options. " "Similar to using pandoc -d option." ) } }, "additionalProperties": False }, ) ] @server.call_tool() async def handle_call_tool( name: str, arguments: dict | None ) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]: """Handle tool execution requests. Tools can modify server state and notify clients of changes. """ if name not in ["convert-contents"]: raise ValueError(f"Unknown tool: {name}") print(arguments) if not arguments: raise ValueError("Missing arguments") # Extract all possible arguments contents = arguments.get("contents") input_file = arguments.get("input_file") output_file = arguments.get("output_file") output_format = arguments.get("output_format", "markdown").lower() input_format = arguments.get("input_format", "markdown").lower() reference_doc = arguments.get("reference_doc") filters = arguments.get("filters", []) defaults_file = arguments.get("defaults_file") # Validate input parameters if not contents and not input_file: raise ValueError("Either 'contents' or 'input_file' must be provided") # Validate reference_doc if provided if reference_doc: if output_format != "docx": raise ValueError("reference_doc parameter is only supported for docx output format") if not os.path.exists(reference_doc): raise ValueError(f"Reference document not found: {reference_doc}") # Validate defaults_file if provided if defaults_file: if not os.path.exists(defaults_file): raise ValueError(f"Defaults file not found: {defaults_file}") # Check if it's a valid YAML file and readable try: with open(defaults_file) as f: yaml_content = yaml.safe_load(f) # Validate the YAML structure if not isinstance(yaml_content, dict): raise ValueError(f"Invalid defaults file format: {defaults_file} - must be a YAML dictionary") # Check if the defaults file specifies an output format that conflicts with the requested format if 'to' in yaml_content and yaml_content['to'] != output_format: print( f"Warning: Defaults file specifies output format '{yaml_content['to']}' " f"but requested format is '{output_format}'. Using requested format." ) except yaml.YAMLError as e: raise ValueError(f"Error parsing defaults file {defaults_file}: {str(e)}") from e except PermissionError as e: raise ValueError(f"Permission denied when reading defaults file: {defaults_file}") from e except Exception as e: raise ValueError(f"Error reading defaults file {defaults_file}: {str(e)}") from e # Define supported formats supported_formats = {'html', 'markdown', 'pdf', 'docx', 'rst', 'latex', 'epub', 'txt', 'ipynb', 'odt'} if output_format not in supported_formats: raise ValueError( f"Unsupported output format: '{output_format}'. Supported formats are: {', '.join(supported_formats)}" ) # Validate output_file requirement for advanced formats advanced_formats = {'pdf', 'docx', 'rst', 'latex', 'epub'} if output_format in advanced_formats and not output_file: raise ValueError(f"output_file path is required for {output_format} format") # Validate filters if provided if filters: if not isinstance(filters, list): raise ValueError("filters parameter must be an array of strings") for filter_path in filters: if not isinstance(filter_path, str): raise ValueError("Each filter must be a string path") def resolve_filter_path(filter_path, defaults_file=None): """Resolve a filter path by trying multiple possible locations. Args: ---- filter_path: The original filter path (absolute or relative) defaults_file: Optional path to the defaults file for context Returns: ------- Resolved absolute path to the filter if found, or None if not found """ # If it's already an absolute path, just use it if os.path.isabs(filter_path): paths = [filter_path] else: # Try multiple locations for relative paths paths = [ # 1. Relative to current working directory os.path.abspath(filter_path), # 2. Relative to the defaults file directory (if provided) os.path.join(os.path.dirname(os.path.abspath(defaults_file)), filter_path) if defaults_file else None, # 3. Relative to the .pandoc/filters directory os.path.join(os.path.expanduser("~"), ".pandoc", "filters", os.path.basename(filter_path)) ] # Remove None entries paths = [p for p in paths if p] # Try each path for path in paths: if os.path.exists(path): # Check if executable and try to make it executable if not if not os.access(path, os.X_OK): try: os.chmod(path, os.stat(path).st_mode | 0o111) print(f"Made filter executable: {path}") except Exception as e: print(f"Warning: Could not make filter executable: {path} - {str(e)}") continue print(f"Using filter: {path}") return path return None def validate_filters(filters, defaults_file=None): """Validate filter paths and ensure they exist and are executable.""" validated_filters = [] for filter_path in filters: resolved_path = resolve_filter_path(filter_path, defaults_file) if resolved_path: validated_filters.append(resolved_path) else: raise ValueError(f"Filter not found in any of the searched locations: {filter_path}") return validated_filters def format_result_info(filters=None, defaults_file=None, validated_filters=None): """Format filter and defaults file information for result messages.""" filter_info = "" defaults_info = "" if filters and validated_filters: filter_names = [os.path.basename(f) for f in validated_filters] filter_info = f" with filters: {', '.join(filter_names)}" if defaults_file: defaults_basename = os.path.basename(defaults_file) defaults_info = f" using defaults file: {defaults_basename}" return filter_info, defaults_info try: # Prepare conversion arguments extra_args = [] # Add defaults file if provided - ensure it's properly formatted for Pandoc if defaults_file: # Make sure the path is absolute defaults_file_abs = os.path.abspath(defaults_file) extra_args.extend(["--defaults", defaults_file_abs]) # Set environment variables for filters env = os.environ.copy() if output_file: # Set PANDOC_OUTPUT_DIR to the directory of the output file output_dir = os.path.dirname(os.path.abspath(output_file)) env["PANDOC_OUTPUT_DIR"] = output_dir else: output_dir = None # Validate filters once and reuse the result validated_filters = validate_filters(filters, defaults_file) if filters else [] # Handle filter arguments for filter_path in validated_filters: extra_args.extend(["--filter", filter_path]) # Handle PDF-specific conversion if needed if output_format == "pdf": extra_args.extend([ "--pdf-engine=xelatex", "-V", "geometry:margin=1in" ]) # Handle reference doc for docx format if reference_doc and output_format == "docx": extra_args.extend([ "--reference-doc", reference_doc ]) # No special processing needed for content # Convert content using pypandoc if input_file: if not os.path.exists(input_file): raise ValueError(f"Input file not found: {input_file}") if output_file: # Convert file to file converted_output = pypandoc.convert_file( input_file, output_format, outputfile=output_file, extra_args=extra_args ) # Create result message with filter and defaults information filter_info, defaults_info = format_result_info(filters, defaults_file, validated_filters) result_message = f"File successfully converted{filter_info}{defaults_info} and saved to: {output_file}" else: # Convert file to string converted_output = pypandoc.convert_file( input_file, output_format, extra_args=extra_args ) else: # No special processing needed for content if output_file: # Convert content to file pypandoc.convert_text( contents, output_format, format=input_format, outputfile=output_file, extra_args=extra_args ) # Create result message with filter and defaults information filter_info, defaults_info = format_result_info(filters, defaults_file, validated_filters) result_message = ( f"Content successfully converted{filter_info}{defaults_info} and saved to: {output_file}" ) else: # Convert content to string converted_output = pypandoc.convert_text( contents, output_format, format=input_format, extra_args=extra_args ) if output_file: notify_with_result = result_message else: if not converted_output: raise ValueError("Conversion resulted in empty output") # Add filter and defaults information to the notification filter_info, defaults_info = format_result_info(filters, defaults_file, validated_filters) # Adjust format for inline display if filter_info: filter_info = f" (with filters: {', '.join([os.path.basename(f) for f in validated_filters])})" if defaults_info: defaults_info = f" (using defaults file: {os.path.basename(defaults_file)})" notify_with_result = ( f'Following are the converted contents in {output_format} format{filter_info}{defaults_info}.\n' f'Ask user if they expect to save this file. If so, provide the output_file parameter with ' f'complete path.\n' f'Converted Contents:\n\n{converted_output}' ) return [ types.TextContent( type="text", text=notify_with_result ) ] except Exception as e: # Handle Pandoc conversion errors error_prefix = "Error converting" error_details = str(e) if "Filter not found" in error_details or "Filter is not executable" in error_details: error_prefix = "Filter error during conversion" elif "defaults" in error_details and defaults_file: error_prefix = "Defaults file error during conversion" # Add more context about the defaults file error_details += f" (defaults file: {defaults_file})" elif "pandoc" in error_details.lower() and "not found" in error_details.lower(): error_prefix = "Pandoc executable not found" error_details = "Please ensure Pandoc is installed and available in your PATH" error_msg = ( f"{error_prefix} {'file' if input_file else 'contents'} from {input_format} to " f"{output_format}: {error_details}" ) raise ValueError(error_msg) from e async def main(): """Run the mcp-pandoc server using stdin/stdout streams.""" async with mcp.server.stdio.stdio_server() as (read_stream, write_stream): await server.run( read_stream, write_stream, InitializationOptions( server_name="mcp-pandoc", server_version="0.8.1", # Universal MCP compatibility & SDK upgrade capabilities=server.get_capabilities( notification_options=NotificationOptions(), experimental_capabilities={}, ), ), )

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/vivekVells/mcp-pandoc'

If you have feedback or need assistance with the MCP directory API, please join our Discord server