OpenZIM MCP Server

Overview Schema Related Servers Score Discussions

openzim-mcp
openzim_mcp

simple_tools.py•17.5 kB

""" Simple tools implementation for OpenZIM MCP server. This module provides intelligent, natural language-based tools that abstract away the complexity of multiple specialized tools. Designed for LLMs with limited tool-calling capabilities or context windows. """ import logging import re from typing import Any, Dict, Optional, Tuple from .zim_operations import ZimOperations logger = logging.getLogger(__name__) class IntentParser: """Parse natural language queries to determine user intent.""" # Intent patterns with priority (checked in order) INTENT_PATTERNS = [ # File listing intents ( r"\b(list|show|what|available|get)\s+(files?|zim|archives?)\b", "list_files", ), # Metadata intents (more specific patterns) ( r"\b(metadata|info|details?)\s+(for|about|of)\s+(file|zim|archive)\b", "metadata", ), ( r"\b(metadata|info|details?)\s+(for|about|of)\s+this\s+(file|zim|archive)\b", "metadata", ), (r"\b(metadata|info|details?)\s+for\b", "metadata"), (r"\binfo\s+about\b", "metadata"), (r"\bdetails?\s+of\b", "metadata"), # Main page intents (r"\b(main|home|start)\s+page\b", "main_page"), # Namespace listing intents (r"\b(list|show|what)\s+namespaces?\b", "list_namespaces"), # Browse intents (r"\b(browse|explore|show|list)\s+(namespace|articles?|entries)\b", "browse"), # Article structure intents (r"\b(structure|outline|sections?|headings?)\s+(of|for)?\b", "structure"), # Links extraction intents (r"\b(links?|references?|related)\s+(in|from|to)\b", "links"), # Suggestions intents (more specific) (r"\b(suggestions?|autocomplete|complete|hints?)\s+(for|of)\b", "suggestions"), (r"\b(suggest|autocomplete|complete)\b", "suggestions"), # Filtered search intents ( r"\b(search|find|look)\s+.+\s+(in|within)\s+(namespace|type)\b", "filtered_search", ), # Get article intents (specific) (r"\b(get|show|read|display|fetch)\s+(article|entry|page)\b", "get_article"), # Search intents (general) (r"\b(search|find|look\s+for|query)\b", "search"), ] @classmethod def parse_intent(cls, query: str) -> Tuple[str, Dict[str, Any]]: """ Parse a natural language query to determine intent. Args: query: Natural language query string Returns: Tuple of (intent_type, extracted_params) """ query_lower = query.lower() # Check each pattern in priority order for pattern, intent in cls.INTENT_PATTERNS: if re.search(pattern, query_lower, re.IGNORECASE): params = cls._extract_params(query, intent) return intent, params # Default to search if no specific intent detected return "search", {"query": query} @classmethod def _extract_params(cls, query: str, intent: str) -> Dict[str, Any]: """ Extract parameters from query based on intent. Args: query: Original query string intent: Detected intent type Returns: Dictionary of extracted parameters """ params: Dict[str, Any] = {} if intent == "browse": # Extract namespace from query namespace_match = re.search( r"namespace\s+['\"]?([A-Za-z0-9_.-]+)['\"]?", query, re.IGNORECASE ) if namespace_match: params["namespace"] = namespace_match.group(1) elif intent == "filtered_search": # Extract search query and filters # Try to extract the search term search_match = re.search( r"(?:search|find|look)\s+(?:for\s+)?['\"]?([^'\"]+?)['\"]?\s+(?:in|within)", query, re.IGNORECASE, ) if search_match: params["query"] = search_match.group(1).strip() # Extract namespace filter namespace_match = re.search( r"namespace\s+['\"]?([A-Za-z0-9_.-]+)['\"]?", query, re.IGNORECASE ) if namespace_match: params["namespace"] = namespace_match.group(1) # Extract content type filter type_match = re.search( r"type\s+['\"]?([A-Za-z0-9_/.-]+)['\"]?", query, re.IGNORECASE ) if type_match: params["content_type"] = type_match.group(1) elif intent in ["get_article", "structure", "links"]: # Extract article/entry path # Try to find quoted strings first quoted_match = re.search(r"['\"]([^'\"]+)['\"]", query) if quoted_match: params["entry_path"] = quoted_match.group(1) else: # Try to extract after keywords # For links: "links in Biology", "references from Evolution" # For structure: "structure of Biology" # For get_article: "get article Biology" path_match = re.search( r"(?:article|entry|page|of|for|in|from|to)\s+([A-Za-z0-9_/.-]+)", query, re.IGNORECASE, ) if path_match: params["entry_path"] = path_match.group(1) elif intent == "suggestions": # Extract partial query suggest_match = re.search( r"(?:suggestions?|autocomplete|complete|hints?)\s+(?:for\s+)?['\"]?([^'\"]+)['\"]?", query, re.IGNORECASE, ) if suggest_match: params["partial_query"] = suggest_match.group(1).strip() elif intent == "search": # For general search, use the whole query or extract search term search_match = re.search( r"(?:search|find|look)\s+(?:for\s+)?['\"]?([^'\"]+)['\"]?", query, re.IGNORECASE, ) if search_match: params["query"] = search_match.group(1).strip() else: params["query"] = query return params class SimpleToolsHandler: """Handler for simple, intelligent MCP tools.""" def __init__(self, zim_operations: ZimOperations): """ Initialize simple tools handler. Args: zim_operations: ZimOperations instance for underlying operations """ self.zim_operations = zim_operations self.intent_parser = IntentParser() def handle_zim_query( self, query: str, zim_file_path: Optional[str] = None, options: Optional[Dict[str, Any]] = None, ) -> str: """ Handle a natural language query about ZIM file content. This is the main intelligent tool that routes queries to appropriate underlying operations based on intent parsing. Args: query: Natural language query zim_file_path: Optional path to ZIM file (auto-selects if not provided) options: Optional dict with advanced options (limit, offset, etc.) Returns: Response string with results """ try: options = options or {} # Parse intent from query intent, params = self.intent_parser.parse_intent(query) logger.info(f"Parsed intent: {intent}, params: {params}") # Handle file listing (doesn't require zim_file_path) if intent == "list_files": return self.zim_operations.list_zim_files() # Auto-select ZIM file if not provided if not zim_file_path: zim_file_path = self._auto_select_zim_file() if not zim_file_path: return ( "❌ **No ZIM File Specified**\n\n" "Please specify a ZIM file path, or ensure there is exactly one ZIM file available.\n\n" "**Available files:**\n" f"{self.zim_operations.list_zim_files()}" ) # Route to appropriate operation based on intent if intent == "metadata": return self.zim_operations.get_zim_metadata(zim_file_path) elif intent == "main_page": return self.zim_operations.get_main_page(zim_file_path) elif intent == "list_namespaces": return self.zim_operations.list_namespaces(zim_file_path) elif intent == "browse": namespace = params.get("namespace", "C") limit = options.get("limit", 50) offset = options.get("offset", 0) return self.zim_operations.browse_namespace( zim_file_path, namespace, limit, offset ) elif intent == "structure": entry_path = params.get("entry_path") if not entry_path: return ( "⚠️ **Missing Article Path**\n\n" "Please specify which article you want to get the structure for.\n" "**Example**: 'structure of Biology' or 'structure of \"C/Evolution\"'" ) return self.zim_operations.get_article_structure( zim_file_path, entry_path ) elif intent == "links": entry_path = params.get("entry_path") if not entry_path: return ( "⚠️ **Missing Article Path**\n\n" "Please specify which article you want to extract links from.\n" "**Example**: 'links in Biology' or 'links from \"C/Evolution\"'" ) return self.zim_operations.extract_article_links( zim_file_path, entry_path ) elif intent == "suggestions": partial_query = params.get("partial_query", "") if not partial_query: return ( "⚠️ **Missing Search Term**\n\n" "Please specify what you want suggestions for.\n" "**Example**: 'suggestions for bio' or 'autocomplete \"evol\"'" ) limit = options.get("limit", 10) return self.zim_operations.get_search_suggestions( zim_file_path, partial_query, limit ) elif intent == "filtered_search": search_query = params.get("query", query) namespace = params.get("namespace") content_type = params.get("content_type") limit = options.get("limit") offset = options.get("offset", 0) return self.zim_operations.search_with_filters( zim_file_path, search_query, namespace, content_type, limit, offset ) elif intent == "get_article": entry_path = params.get("entry_path") if not entry_path: # If no specific path, try to extract from query # Remove common words and use remainder as entry path cleaned_query = re.sub( r"\b(get|show|read|display|fetch|article|entry|page)\b", "", query, flags=re.IGNORECASE, ).strip() if cleaned_query: entry_path = cleaned_query else: return ( "⚠️ **Missing Article Path**\n\n" "Please specify which article you want to read.\n" "**Example**: 'get article Biology' or 'show \"C/Evolution\"'" ) max_content_length = options.get("max_content_length") return self.zim_operations.get_zim_entry( zim_file_path, entry_path, max_content_length ) elif intent == "search": search_query = params.get("query", query) limit = options.get("limit") offset = options.get("offset", 0) return self.zim_operations.search_zim_file( zim_file_path, search_query, limit, offset ) else: # Fallback to search return self.zim_operations.search_zim_file( zim_file_path, query, options.get("limit"), options.get("offset", 0) ) except Exception as e: logger.error(f"Error handling zim_query: {e}") return ( f"❌ **Error Processing Query**\n\n" f"**Query**: {query}\n" f"**Error**: {str(e)}\n\n" f"**Troubleshooting**:\n" f"1. Check that the ZIM file path is correct\n" f"2. Verify the query format\n" f"3. Try a simpler query\n" f"4. Check server logs for details" ) def _auto_select_zim_file(self) -> Optional[str]: """ Auto-select a ZIM file if only one is available. Returns: Path to ZIM file if exactly one exists, None otherwise """ try: import json files_result = self.zim_operations.list_zim_files() # Try to parse the JSON response # The list_zim_files returns a formatted string, so we need to extract the JSON if "[" in files_result and "]" in files_result: start = files_result.index("[") end = files_result.rindex("]") + 1 files_json = files_result[start:end] files = json.loads(files_json) if len(files) == 1: return str(files[0]["path"]) except Exception as e: logger.warning(f"Could not auto-select ZIM file: {e}") return None def handle_server_status(self, action: Optional[str] = None) -> str: """ Handle server management and diagnostics queries. This intelligent tool routes server management queries to appropriate underlying operations. Args: action: Optional action or natural language query (e.g., "health", "diagnose", "fix", or natural language) Returns: Response string with server status information """ try: # If no action specified, default to health check if not action: # Import here to avoid circular dependency from .server import OpenZimMcpServer # noqa: F401 # We need access to the server instance methods # This will be called from the server context # For now, return a helpful message return ( "ℹ️ **Server Status Tool**\n\n" "Please specify what you want to check:\n" "- 'health' or 'status' - Get server health and statistics\n" "- 'diagnose' or 'check' - Run comprehensive diagnostics\n" "- 'fix' or 'resolve' - Resolve server conflicts\n" "- 'config' or 'configuration' - Get server configuration\n\n" "**Example**: 'health' or 'diagnose server'" ) action_lower = action.lower() # Parse intent from action if any( keyword in action_lower for keyword in ["health", "status", "stats", "performance"] ): intent = "health" elif any( keyword in action_lower for keyword in ["diagnose", "check", "problems", "issues", "validate"] ): intent = "diagnose" elif any( keyword in action_lower for keyword in ["fix", "resolve", "conflicts", "cleanup", "repair"] ): intent = "resolve" elif any( keyword in action_lower for keyword in ["config", "configuration", "settings", "setup"] ): intent = "config" else: # Default to health check intent = "health" # Return intent information # The actual routing will be done in the server.py when registering tools return f"Intent: {intent}, Action: {action}" except Exception as e: logger.error(f"Error handling server_status: {e}") return ( f"❌ **Error Processing Server Status Query**\n\n" f"**Action**: {action}\n" f"**Error**: {str(e)}\n\n" f"**Troubleshooting**:\n" f"1. Try a simpler action like 'health' or 'diagnose'\n" f"2. Check server logs for details" )

Latest Blog Posts

Federated Learning with MCP: Building Privacy-Preserving Agents Across Distributed Edges
By Om-Shree-0709 on December 21, 2025.
Secure
mcp
Learning
What Is Context Bloat in MCP?
By Om-Shree-0709 on December 16, 2025.
mcp
Context Bloat
MCP Moves to the Linux Foundation: Neutral Stewardship for Agentic Infrastructure
By Om-Shree-0709 on December 15, 2025.
mcp
anthropic
Linux Foundation

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cameronrye/openzim-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server