DocsetMCP

Overview Schema Related Servers Score Discussions

DocsetMCP
docsetmcp

config_loader.py•12.5 KiB

#!/usr/bin/env python3 """ Configuration loader with smart defaults for dash-mcp """ import yaml import sqlite3 import os from pathlib import Path from .types import ( LanguageConfig, DocsetConfig, ProcessedDocsetConfig, ProcessedLanguageConfig, DefaultConfig, ) class ConfigLoader: """Load and process docset configurations with smart defaults""" # Default values applied to all configs DEFAULTS: DefaultConfig = { "format": "tarix", "enabled": True, "framework_pattern": "", "language_defaults": {"filter": "", "prefix": ""}, } def __init__(self, config_dir: Path | None = None): if config_dir is None: config_dir = Path(__file__).parent / "docsets" self.config_dir = config_dir def load_config(self, docset_name: str) -> ProcessedDocsetConfig: """Load a single docset configuration with defaults applied""" config_file = self.config_dir / f"{docset_name}.yaml" if not config_file.exists(): raise FileNotFoundError(f"Config not found: {config_file}") with open(config_file, "r") as f: raw_config = yaml.safe_load(f) return self._apply_defaults(raw_config) def load_all_configs( self, additional_docset_paths: list[str] | None = None ) -> dict[str, ProcessedDocsetConfig]: """Load all docset configurations, including auto-detected ones""" configs: dict[str, ProcessedDocsetConfig] = {} # Load existing YAML configs for config_file in self.config_dir.glob("*.yaml"): docset_name = config_file.stem try: configs[docset_name] = self.load_config(docset_name) except Exception as e: print(f"Warning: Failed to load {docset_name}: {e}") # Auto-detect docsets in additional paths if additional_docset_paths: auto_detected = self._discover_docsets(additional_docset_paths) # Only add auto-detected if they don't have explicit configs for docset_name, config in auto_detected.items(): if docset_name not in configs: configs[docset_name] = config return configs def _discover_docsets( self, search_paths: list[str] ) -> dict[str, ProcessedDocsetConfig]: """Discover and auto-configure docsets in the given paths""" discovered: dict[str, ProcessedDocsetConfig] = {} for search_path in search_paths: path = Path(os.path.expanduser(search_path)) if not path.exists(): continue # Look for .docset directories for docset_path in path.glob("*.docset"): docset_name = ( docset_path.stem.lower().replace(".", "").replace("-", "_") ) # Skip if we already found this docset if docset_name in discovered: continue try: config = self._generate_config_from_docset(docset_path) if config: discovered[docset_name] = config except Exception as e: print(f"Warning: Failed to auto-detect {docset_path.name}: {e}") return discovered def _generate_config_from_docset( self, docset_path: Path ) -> ProcessedDocsetConfig | None: """Generate a configuration by analyzing a docset""" # Check if it's a valid docset structure resources = docset_path / "Contents" / "Resources" if not resources.exists(): return None # Look for database files to determine format optimized_db = resources / "optimizedIndex.dsidx" docset_db = resources / "docSet.dsidx" tarix_file = resources / "tarix.tgz" # Determine format and database path if tarix_file.exists() and optimized_db.exists(): db_path = optimized_db elif docset_db.exists(): db_path = docset_db else: return None # Extract info from database name = docset_path.stem docset_info = self._analyze_docset_database(db_path) # Create config raw_config: DocsetConfig = { "name": name, "docset_path": docset_path.name, "description": f"Auto-detected {name} documentation", "languages": docset_info.get( "languages", ["javascript"] ), # Default fallback "types": docset_info.get("types", []), } return self._apply_defaults(raw_config) def _analyze_docset_database(self, db_path: Path) -> dict[str, list[str]]: """Analyze docset database to extract types and infer languages""" info: dict[str, list[str]] = {"types": [], "languages": []} try: conn = sqlite3.connect(db_path) cursor = conn.cursor() # Get distinct types cursor.execute( "SELECT DISTINCT type FROM searchIndex WHERE type IS NOT NULL" ) types = [row[0] for row in cursor.fetchall()] info["types"] = types # Infer languages based on common patterns languages: set[str] = set() # Sample some entries to look for language clues cursor.execute("SELECT name, type FROM searchIndex LIMIT 100") entries = cursor.fetchall() # Look for language-specific patterns for name, _ in entries: name_lower = name.lower() # JavaScript/TypeScript patterns if any( pattern in name_lower for pattern in [ "js", "javascript", "node", "npm", "react", "vue", "angular", ] ): languages.add("javascript") if any(pattern in name_lower for pattern in ["ts", "typescript"]): languages.add("typescript") # Python patterns if any( pattern in name_lower for pattern in [ "python", "py", "django", "flask", "pandas", "numpy", ] ): languages.add("python") # Other common languages if any( pattern in name_lower for pattern in ["java", "spring", "android"] ): languages.add("java") if any( pattern in name_lower for pattern in ["swift", "ios", "macos", "cocoa"] ): languages.add("swift") if any( pattern in name_lower for pattern in ["php", "laravel", "symfony"] ): languages.add("php") # Default to common web languages if nothing detected if not languages: languages = {"javascript", "typescript"} info["languages"] = list(languages) conn.close() except Exception as e: print(f"Warning: Could not analyze database {db_path}: {e}") # Provide sensible defaults info = { "types": ["Guide", "Method", "Class"], "languages": ["javascript", "typescript"], } return info def _apply_defaults(self, config: DocsetConfig) -> ProcessedDocsetConfig: """Apply default values to a configuration""" # Process languages (required field) languages = self._process_languages(config["languages"]) # Process types types_raw = config.get("types") or [] types = self._process_types(types_raw) # Build result with all required fields result: ProcessedDocsetConfig = { "name": config["name"], "docset_path": config["docset_path"], "format": config.get("format", self.DEFAULTS["format"]), "enabled": config.get("enabled", self.DEFAULTS["enabled"]), "framework_pattern": config.get( "framework_pattern", self.DEFAULTS["framework_pattern"] ), "languages": languages, "types": types, "description": config.get("description"), "primary_language": config.get("primary_language"), } return result def _process_languages( self, languages: list[str] | dict[str, str | LanguageConfig] ) -> dict[str, ProcessedLanguageConfig]: """Process language configuration, applying defaults for simple syntax""" if isinstance(languages, list): # Simple syntax: languages: ["python", "javascript"] result: dict[str, ProcessedLanguageConfig] = {} for lang in languages: result[lang] = ProcessedLanguageConfig( filter=self.DEFAULTS["language_defaults"]["filter"], prefix=self.DEFAULTS["language_defaults"]["prefix"], ) return result else: # Must be dict at this point due to type annotation # Full syntax: preserve existing structure but apply defaults processed_result: dict[str, ProcessedLanguageConfig] = {} # languages is already dict[str, Any] in this branch for lang, lang_config in languages.items(): if isinstance(lang_config, dict): # Full config provided processed_result[lang] = ProcessedLanguageConfig( filter=lang_config.get( "filter", self.DEFAULTS["language_defaults"]["filter"] ), prefix=lang_config.get( "prefix", self.DEFAULTS["language_defaults"]["prefix"] ), ) else: # Simple string or other format processed_result[lang] = ProcessedLanguageConfig( filter=self.DEFAULTS["language_defaults"]["filter"], prefix=self.DEFAULTS["language_defaults"]["prefix"], ) return processed_result def _process_types(self, types: dict[str, int] | list[str]) -> dict[str, int]: """Process types configuration, supporting both dict and array formats""" if isinstance(types, list): # Array format: convert to numbered dict with 0-based indexing result = {type_name: index for index, type_name in enumerate(types)} else: # Must be dict at this point due to type annotation # Dict format: use as-is result = types.copy() # Automatically add 'default' with next available number if not present if "default" not in result: max_priority = max(result.values()) if result else -1 result["default"] = max_priority + 1 return result def create_simplified_config( name: str, docset_path: str, languages: list[str], types: dict[str, int], **overrides: str | bool, ) -> DocsetConfig: """Helper function to create a simplified config""" # Start with required fields config: dict[str, str | list[str] | dict[str, int] | bool] = { "name": name, "docset_path": docset_path, "languages": languages, "types": types, } # Add any overrides (for complex cases) for key, value in overrides.items(): if key in { "description", "primary_language", "format", "framework_pattern", "enabled", }: config[key] = value # Cast to DocsetConfig - this is safe because we've set all required fields return config # type: ignore[return-value] if __name__ == "__main__": # Test the config loader loader = ConfigLoader() # Test loading a specific config try: config = loader.load_config("nodejs") print("NodeJS config:") print(yaml.dump(config, default_flow_style=False)) except Exception as e: print(f"Error: {e}") # Count total configs all_configs = loader.load_all_configs() print(f"\nLoaded {len(all_configs)} configurations successfully")

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/codybrom/DocsetMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

config_loader.py•12.5 KiB