OpenZIM MCP Server

Overview Schema Related Servers Score Discussions

structure_tools.py•10.4 KiB

"""Article structure and content analysis tools for OpenZIM MCP server.""" import logging from typing import TYPE_CHECKING, Optional from ..constants import INPUT_LIMIT_ENTRY_PATH, INPUT_LIMIT_FILE_PATH from ..exceptions import OpenZimMcpRateLimitError from ..security import sanitize_input if TYPE_CHECKING: from ..server import OpenZimMcpServer logger = logging.getLogger(__name__) def register_structure_tools(server: "OpenZimMcpServer") -> None: """ Register article structure and content analysis tools. Args: server: The OpenZimMcpServer instance to register tools on """ @server.mcp.tool() async def get_article_structure(zim_file_path: str, entry_path: str) -> str: """Extract article structure including headings, sections, and key metadata. Args: zim_file_path: Path to the ZIM file entry_path: Entry path, e.g., 'C/Some_Article' Returns: JSON string containing article structure """ try: # Check rate limit try: server.rate_limiter.check_rate_limit("get_structure") except OpenZimMcpRateLimitError as e: return server._create_enhanced_error_message( operation="get article structure", error=e, context=f"Entry: {entry_path}", ) # Sanitize inputs zim_file_path = sanitize_input(zim_file_path, INPUT_LIMIT_FILE_PATH) entry_path = sanitize_input(entry_path, INPUT_LIMIT_ENTRY_PATH) # Use async operations return await server.async_zim_operations.get_article_structure( zim_file_path, entry_path ) except Exception as e: logger.error(f"Error getting article structure: {e}") return server._create_enhanced_error_message( operation="get article structure", error=e, context=f"File: {zim_file_path}, Entry: {entry_path}", ) @server.mcp.tool() async def extract_article_links(zim_file_path: str, entry_path: str) -> str: """Extract internal and external links from an article. Args: zim_file_path: Path to the ZIM file entry_path: Entry path, e.g., 'C/Some_Article' Returns: JSON string containing extracted links """ try: # Check rate limit try: server.rate_limiter.check_rate_limit("get_structure") except OpenZimMcpRateLimitError as e: return server._create_enhanced_error_message( operation="extract article links", error=e, context=f"Entry: {entry_path}", ) # Sanitize inputs zim_file_path = sanitize_input(zim_file_path, INPUT_LIMIT_FILE_PATH) entry_path = sanitize_input(entry_path, INPUT_LIMIT_ENTRY_PATH) # Use async operations return await server.async_zim_operations.extract_article_links( zim_file_path, entry_path ) except Exception as e: logger.error(f"Error extracting article links: {e}") return server._create_enhanced_error_message( operation="extract article links", error=e, context=f"File: {zim_file_path}, Entry: {entry_path}", ) @server.mcp.tool() async def get_entry_summary( zim_file_path: str, entry_path: str, max_words: int = 200, ) -> str: """Get a concise summary of an article without returning the full content. This tool extracts the opening paragraph(s) or introduction section, providing a quick overview of the article content. Useful for getting context without loading full articles. Args: zim_file_path: Path to the ZIM file entry_path: Entry path, e.g., 'C/Some_Article' max_words: Maximum number of words in the summary (default: 200, max: 1000) Returns: JSON string containing: - title: Article title - path: Entry path - summary: Extracted summary text - word_count: Number of words in summary - is_truncated: Whether the summary was truncated Examples: - Quick overview: get_entry_summary("/path/to/wiki.zim", "Biology") - Longer summary: get_entry_summary(..., "Evolution", max_words=500) """ try: # Check rate limit try: server.rate_limiter.check_rate_limit("get_entry") except OpenZimMcpRateLimitError as e: return server._create_enhanced_error_message( operation="get entry summary", error=e, context=f"Entry: {entry_path}", ) # Sanitize inputs zim_file_path = sanitize_input(zim_file_path, INPUT_LIMIT_FILE_PATH) entry_path = sanitize_input(entry_path, INPUT_LIMIT_ENTRY_PATH) # Use async operations return await server.async_zim_operations.get_entry_summary( zim_file_path, entry_path, max_words ) except Exception as e: logger.error(f"Error getting entry summary: {e}") return server._create_enhanced_error_message( operation="get entry summary", error=e, context=f"File: {zim_file_path}, Entry: {entry_path}", ) @server.mcp.tool() async def get_table_of_contents( zim_file_path: str, entry_path: str, ) -> str: """Extract a hierarchical table of contents from an article. Returns a structured TOC tree based on heading levels (h1-h6), suitable for navigation and content overview. Args: zim_file_path: Path to the ZIM file entry_path: Entry path, e.g., 'C/Some_Article' Returns: JSON string containing: - title: Article title - path: Entry path - toc: Hierarchical list of headings with children - heading_count: Total number of headings - max_depth: Deepest heading level used Each TOC entry contains: - level: Heading level (1-6) - text: Heading text - id: Anchor ID for linking - children: Nested subheadings Examples: - Get TOC: get_table_of_contents("/path/to/wiki.zim", "Biology") """ try: # Check rate limit try: server.rate_limiter.check_rate_limit("get_structure") except OpenZimMcpRateLimitError as e: return server._create_enhanced_error_message( operation="get table of contents", error=e, context=f"Entry: {entry_path}", ) # Sanitize inputs zim_file_path = sanitize_input(zim_file_path, INPUT_LIMIT_FILE_PATH) entry_path = sanitize_input(entry_path, INPUT_LIMIT_ENTRY_PATH) # Use async operations return await server.async_zim_operations.get_table_of_contents( zim_file_path, entry_path ) except Exception as e: logger.error(f"Error getting table of contents: {e}") return server._create_enhanced_error_message( operation="get table of contents", error=e, context=f"File: {zim_file_path}, Entry: {entry_path}", ) @server.mcp.tool() async def get_binary_entry( zim_file_path: str, entry_path: str, max_size_bytes: Optional[int] = None, include_data: bool = True, ) -> str: """Retrieve binary content from a ZIM entry. This tool returns raw binary content encoded in base64, enabling integration with external tools for processing embedded media like PDFs, videos, and images. Args: zim_file_path: Path to the ZIM file entry_path: Entry path, e.g., 'I/image.png' or 'C/document.pdf' max_size_bytes: Maximum size of content to return (default: 10MB). Content larger than this will return metadata only. include_data: If True (default), include base64-encoded data. Set to False to retrieve metadata only without the binary data. Returns: JSON string containing: - path: Entry path in ZIM file - title: Entry title - mime_type: Content type (e.g., "application/pdf", "image/png") - size: Size in bytes - size_human: Human-readable size (e.g., "1.5 MB") - encoding: "base64" when data is included, null otherwise - data: Base64-encoded content (if include_data=True and under size limit) - truncated: Boolean indicating if content exceeded size limit Examples: - Get a PDF: get_binary_entry("/path/file.zim", "I/document.pdf") - Get image metadata: get_binary_entry(..., "I/logo.png", False) - Large video: get_binary_entry(..., "I/video.mp4", 100000000) """ try: # Check rate limit (binary is most expensive) try: server.rate_limiter.check_rate_limit("get_binary_entry") except OpenZimMcpRateLimitError as e: return server._create_enhanced_error_message( operation="retrieve binary entry", error=e, context=f"Entry: {entry_path}", ) # Sanitize inputs zim_file_path = sanitize_input(zim_file_path, INPUT_LIMIT_FILE_PATH) entry_path = sanitize_input(entry_path, INPUT_LIMIT_ENTRY_PATH) # Use async operations return await server.async_zim_operations.get_binary_entry( zim_file_path, entry_path, max_size_bytes, include_data ) except Exception as e: logger.error(f"Error retrieving binary entry: {e}") return server._create_enhanced_error_message( operation="retrieve binary entry", error=e, context=f"File: {zim_file_path}, Entry: {entry_path}", )

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cameronrye/openzim-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

structure_tools.py•10.4 KiB