Local DeepWiki MCP Server

Overview Schema Related Servers Score Discussions

models.py•24.9 KiB

"""Pydantic configuration models.""" from __future__ import annotations import os from dataclasses import dataclass from enum import StrEnum from pathlib import Path from typing import Any, Literal import yaml from pydantic import BaseModel, Field, computed_field, field_validator, model_validator from local_deepwiki.config.processing_models import ( ASTCacheConfig, ChunkingConfig, EmbeddingBatchConfig, _get_default_parallel_workers, ) from local_deepwiki.config.prompts import ( RESEARCH_DECOMPOSITION_PROMPTS, RESEARCH_GAP_ANALYSIS_PROMPTS, RESEARCH_SYNTHESIS_PROMPTS, WIKI_ARCHITECTURE_PROMPTS, WIKI_FILE_PROMPTS, WIKI_MODULE_PROMPTS, WIKI_OVERVIEW_PROMPTS, WIKI_SYSTEM_PROMPTS, PromptsConfig, ProviderPromptsConfig, ) from local_deepwiki.config.provider_models import ( AnthropicConfig, EmbeddingConfig, LLMConfig, LocalEmbeddingConfig, OllamaConfig, OpenAIEmbeddingConfig, OpenAILLMConfig, ) from local_deepwiki.models.provider_types import EmbeddingProviderType, LLMProviderType class ResearchPreset(StrEnum): """Research mode presets for deep research pipeline.""" QUICK = "quick" DEFAULT = "default" THOROUGH = "thorough" class GenerationMode(StrEnum): """Wiki page generation strategy.""" EAGER = "eager" LAZY = "lazy" HYBRID = "hybrid" # Preset parameter values for each research mode RESEARCH_PRESETS: dict[ResearchPreset, dict[str, Any]] = { ResearchPreset.QUICK: { "max_sub_questions": 2, "chunks_per_subquestion": 3, "max_total_chunks": 15, "max_follow_up_queries": 1, "synthesis_temperature": 0.3, "synthesis_max_tokens": 2048, }, ResearchPreset.DEFAULT: { "max_sub_questions": 4, "chunks_per_subquestion": 5, "max_total_chunks": 30, "max_follow_up_queries": 3, "synthesis_temperature": 0.5, "synthesis_max_tokens": 4096, }, ResearchPreset.THOROUGH: { "max_sub_questions": 6, "chunks_per_subquestion": 8, "max_total_chunks": 50, "max_follow_up_queries": 5, "synthesis_temperature": 0.5, "synthesis_max_tokens": 8192, }, } class ParsingConfig(BaseModel): """Code parsing configuration.""" model_config = {"frozen": True} languages: list[str] = Field( default=[ "python", "typescript", "javascript", "go", "rust", "java", "c", "cpp", "swift", "ruby", "php", "kotlin", "csharp", ], description="Languages to parse", ) max_file_size: int = Field( default=1048576, description="Max file size in bytes (1MB)" ) exclude_patterns: list[str] = Field( default=[ "node_modules/**", "venv/**", ".venv/**", "__pycache__/**", ".git/**", "*.min.js", "*.min.css", "dist/**", "build/**", ".next/**", "target/**", "vendor/**", "htmlcov/**", ".pytest_cache/**", ".mypy_cache/**", ".ruff_cache/**", ".tox/**", ".nox/**", "coverage/**", ".coverage", "coverage_html/**", "coverage_openai_embeddings/**", ".claude/**", ".windsurf/**", ".cursor/**", ".aider/**", "agents/**", "AGENTS.md", ], description="Glob patterns to exclude", ) class WikiConfig(BaseModel): """Wiki generation configuration.""" model_config = {"frozen": True} max_file_docs: int = Field( default=500, description="Maximum number of file-level documentation pages to generate. " "Set to 0 for unlimited.", ) max_concurrent_llm_calls: int = Field( default=8, ge=1, le=20, description="Maximum concurrent LLM calls for file documentation generation. " "Higher values speed up generation but increase memory/API usage.", ) use_cloud_for_github: bool = Field( default=False, description="Use cloud LLM provider (Anthropic Claude) for GitHub repos. " "Provides faster, higher-quality documentation but requires API key.", ) github_llm_provider: Literal["anthropic", "openai"] = Field( default="anthropic", description="Cloud LLM provider to use for GitHub repos when use_cloud_for_github is enabled.", ) chat_llm_provider: Literal["default", "anthropic", "openai", "ollama"] = Field( default="default", description="LLM provider for chat Q&A. 'default' uses the main llm.provider setting. " "Set to 'anthropic' or 'openai' for higher-quality chat responses.", ) import_search_limit: int = Field( default=200, description="Maximum chunks to search for import/relationship analysis", ) context_search_limit: int = Field( default=50, description="Maximum chunks to search for context when generating documentation", ) fallback_search_limit: int = Field( default=30, description="Maximum chunks to search in fallback queries" ) max_chunk_content_chars: int = Field( default=15000, ge=500, le=50000, description="Maximum characters of chunk content included in LLM prompts " "during wiki generation. Higher values produce more accurate documentation " "for large functions but increase token usage. The previous hardcoded limit " "was 1500.", ) max_chunks_per_file: int = Field( default=60, ge=5, le=200, description="Maximum number of code chunks included in the LLM prompt when " "generating file-level documentation. Chunks are prioritized by type " "(functions/methods first, then classes, then module/imports) so the most " "documentation-relevant content is preserved when files exceed this limit.", ) codemap_enabled: bool = Field( default=True, description="Enable automatic codemap generation during wiki build. " "Generates execution-flow diagrams for high-value entry points.", ) codemap_max_topics: int = Field( default=5, ge=0, le=20, description="Maximum number of codemap topics to auto-generate (0 to disable).", ) codemap_max_depth: int = Field( default=4, ge=1, le=10, description="BFS traversal depth for codemap generation.", ) codemap_max_nodes: int = Field( default=30, ge=5, le=60, description="Maximum nodes per codemap graph.", ) generation_mode: GenerationMode = Field( default=GenerationMode.EAGER, description="Wiki page generation strategy. " "'eager' (default): generate all pages during indexing. " "'lazy': generate pages on first read. " "'hybrid': generate summary pages and top N files at index time, rest on demand.", ) hybrid_eager_pages: int = Field( default=10, ge=0, le=50, description="Number of top file pages to eagerly generate in hybrid mode.", ) prefetch_workers: int = Field( default=2, ge=0, le=5, description="Background workers for predictive page prefetch (0 disables). " "Only active in lazy and hybrid modes.", ) prefetch_max_queue: int = Field( default=20, ge=0, le=100, description="Maximum pages in the prefetch queue.", ) prefetch_drain: bool = Field( default=False, description="When true, prefetch workers will eventually generate all " "remaining pages after prediction queue drains and system is idle.", ) drain_idle_seconds: int = Field( default=30, ge=5, le=300, description="Seconds of idle time before drain mode starts backfilling.", ) @field_validator("max_concurrent_llm_calls") @classmethod def validate_max_concurrent_llm_calls(cls, v: int) -> int: """Validate max_concurrent_llm_calls is reasonable.""" if v < 1: raise ValueError("max_concurrent_llm_calls must be >= 1") cpu_count = os.cpu_count() or 4 return min(v, cpu_count * 2) @model_validator(mode="after") def validate_search_limits(self) -> "WikiConfig": """Validate search limits are consistent.""" if self.fallback_search_limit > self.context_search_limit: raise ValueError( f"fallback_search_limit ({self.fallback_search_limit}) should not exceed " f"context_search_limit ({self.context_search_limit})" ) return self class DeepResearchConfig(BaseModel): """Deep research pipeline configuration.""" model_config = {"frozen": True} max_sub_questions: int = Field( default=4, ge=1, le=10, description="Maximum sub-questions generated from query decomposition", ) chunks_per_subquestion: int = Field( default=5, ge=1, le=20, description="Code chunks retrieved per sub-question", ) max_total_chunks: int = Field( default=30, ge=10, le=100, description="Maximum total chunks used in synthesis", ) max_follow_up_queries: int = Field( default=3, ge=0, le=10, description="Maximum follow-up queries from gap analysis", ) synthesis_temperature: float = Field( default=0.5, ge=0.0, le=2.0, description="LLM temperature for synthesis (higher = more creative)", ) synthesis_max_tokens: int = Field( default=4096, ge=512, le=16000, description="Maximum tokens in synthesis response", ) def with_preset(self, preset: ResearchPreset | str | None) -> "DeepResearchConfig": """Return a new config with preset values applied. The preset values override the current config values. If preset is None or "default", returns a copy of the current config unchanged. Args: preset: The research preset to apply ("quick", "default", "thorough"). Returns: A new DeepResearchConfig with preset values applied. """ if preset is None: return self.model_copy() # Convert string to enum if needed if isinstance(preset, str): try: preset = ResearchPreset(preset.lower()) except ValueError: # Invalid preset name, return unchanged return self.model_copy() if preset == ResearchPreset.DEFAULT: return self.model_copy() # Get preset values and merge with current config preset_values = RESEARCH_PRESETS.get(preset, {}) return self.model_copy(update=preset_values) class PluginsConfig(BaseModel): """Plugin system configuration.""" model_config = {"frozen": True} enabled: bool = Field(default=True, description="Enable plugin system") custom_dir: str | None = Field( default=None, description="Custom plugins directory path. Plugins in this directory " "are loaded in addition to repo and user plugins.", ) disable_entry_points: bool = Field( default=False, description="Disable loading plugins from setuptools entry points", ) class HooksConfig(BaseModel): """Event hooks configuration.""" model_config = {"frozen": True} enabled: bool = Field(default=True, description="Enable event hooks system") scripts_dir: str | None = Field( default=None, description="Directory containing hook scripts. Scripts are named by event type " "(e.g., index.complete.sh, wiki.page.complete.py).", ) timeout_seconds: int = Field( default=30, ge=1, le=300, description="Maximum execution time for hook scripts in seconds", ) class ExportBatchConfig(BaseModel): """Export configuration for HTML and PDF generation.""" model_config = {"frozen": True} batch_size: int = Field( default=50, ge=1, le=500, description="Pages per batch for PDF generation in streaming mode", ) memory_limit_mb: int = Field( default=500, ge=100, le=4096, description="Memory threshold to trigger streaming mode (MB). " "Wikis larger than this will use streaming export.", ) enable_streaming: bool = Field( default=True, description="Enable streaming mode for large wikis. " "When enabled, pages are processed one at a time to avoid OOM.", ) class OutputConfig(BaseModel): """Output configuration.""" model_config = {"frozen": True} wiki_dir: str = Field(default=".deepwiki", description="Wiki output directory name") vector_db_name: str = Field( default="vectors.lance", description="Vector DB filename" ) class EmbeddingCacheConfig(BaseModel): """Embedding cache configuration.""" model_config = {"frozen": True} enabled: bool = Field(default=True, description="Enable embedding caching") ttl_seconds: int = Field( default=604800, # 7 days ge=60, le=2592000, # 30 days max description="Cache TTL in seconds (default: 7 days)", ) max_entries: int = Field( default=100000, ge=1000, le=1000000, description="Maximum cache entries before cleanup (default: 100k)", ) class LLMCacheConfig(BaseModel): """LLM response caching configuration.""" model_config = {"frozen": True} enabled: bool = Field(default=True, description="Enable LLM response caching") ttl_seconds: int = Field( default=604800, # 7 days ge=60, le=2592000, # 30 days max description="Cache TTL in seconds (default: 7 days)", ) max_entries: int = Field( default=10000, ge=100, le=100000, description="Maximum cache entries before eviction", ) similarity_threshold: float = Field( default=0.95, ge=0.0, le=1.0, description="Minimum similarity score for cache hit (0.0-1.0)", ) max_cacheable_temperature: float = Field( default=0.3, ge=0.0, le=2.0, description="Maximum temperature to cache (higher = non-deterministic)", ) class SearchCacheConfig(BaseModel): """Search result caching configuration for vector store.""" model_config = {"frozen": True} enabled: bool = Field(default=True, description="Enable search result caching") ttl_seconds: int = Field( default=3600, # 1 hour ge=60, le=86400, # 24 hours max description="Cache TTL in seconds (default: 1 hour)", ) max_entries: int = Field( default=1000, ge=100, le=10000, description="Maximum cache entries before eviction", ) similarity_threshold: float = Field( default=0.95, ge=0.0, le=1.0, description="Minimum similarity score for semantic cache hit (0.0-1.0)", ) class SearchConfig(BaseModel): """Search behavior configuration for precision/recall trade-offs. Controls search profiles and adaptive search depth estimation. """ model_config = {"frozen": True} default_profile: Literal["fast", "balanced", "thorough"] = Field( default="balanced", description="Default search profile for precision/recall trade-off. " "'fast' = fewer candidates, faster response; " "'balanced' = default behavior, good balance; " "'thorough' = exhaustive search, best recall but slower.", ) adaptive_search_enabled: bool = Field( default=True, description="Enable adaptive search depth estimation. " "When enabled, search depth adjusts based on query complexity and history.", ) fast_min_similarity: float = Field( default=0.3, ge=0.0, le=1.0, description="Minimum similarity threshold for 'fast' profile (0.0-1.0).", ) balanced_min_similarity: float = Field( default=0.2, ge=0.0, le=1.0, description="Minimum similarity threshold for 'balanced' profile (0.0-1.0).", ) thorough_min_similarity: float = Field( default=0.1, ge=0.0, le=1.0, description="Minimum similarity threshold for 'thorough' profile (0.0-1.0).", ) class LazyIndexConfig(BaseModel): """Lazy vector index configuration for deferred index creation. When enabled, vector indexes are not created immediately when the table reaches the minimum row threshold. Instead, index creation is scheduled as a background task after initial indexing completes, or triggered on-demand when search latency exceeds the threshold. """ model_config = {"frozen": True} enabled: bool = Field( default=True, description="Enable lazy/deferred vector index creation. " "When enabled, indexes are created in the background after initial indexing.", ) latency_threshold_ms: int = Field( default=500, ge=50, le=5000, description="Search latency threshold in milliseconds. " "If average latency exceeds this, index creation is triggered automatically.", ) min_rows: int = Field( default=1000, ge=100, le=100000, description="Minimum number of rows before considering index creation. " "Tables smaller than this threshold use brute-force search.", ) latency_window_size: int = Field( default=10, ge=3, le=100, description="Number of recent searches to consider for latency calculation.", ) class FuzzySearchConfig(BaseModel): """Fuzzy search configuration for typo-tolerant code search. When semantic search results have low similarity scores, fuzzy matching can be automatically enabled to provide "Did you mean?" suggestions based on function/class names in the codebase. """ model_config = {"frozen": True} auto_fuzzy_threshold: float = Field( default=0.5, ge=0.0, le=1.0, description="Similarity score threshold below which fuzzy matching is auto-enabled. " "When the best result has a score below this threshold, fuzzy suggestions are generated.", ) suggestion_threshold: float = Field( default=0.6, ge=0.0, le=1.0, description="Minimum fuzzy similarity score (0.0-1.0) for a name to be included " "in 'Did you mean?' suggestions.", ) max_suggestions: int = Field( default=3, ge=1, le=10, description="Maximum number of 'Did you mean?' suggestions to return.", ) enable_auto_fuzzy: bool = Field( default=True, description="Enable automatic fuzzy fallback when semantic results are poor. " "When disabled, fuzzy matching is only used if explicitly requested.", ) class Config(BaseModel): """Main configuration. This class and all nested config classes are frozen (immutable) to prevent accidental mutation of shared configuration state. Use model_copy(update={...}) or the with_*() helper methods to create modified copies. """ model_config = {"frozen": True} embedding: EmbeddingConfig = Field(default_factory=EmbeddingConfig) embedding_cache: EmbeddingCacheConfig = Field(default_factory=EmbeddingCacheConfig) embedding_batch: EmbeddingBatchConfig = Field(default_factory=EmbeddingBatchConfig) llm: LLMConfig = Field(default_factory=LLMConfig) llm_cache: LLMCacheConfig = Field(default_factory=LLMCacheConfig) search_cache: SearchCacheConfig = Field(default_factory=SearchCacheConfig) search: SearchConfig = Field(default_factory=SearchConfig) lazy_index: LazyIndexConfig = Field(default_factory=LazyIndexConfig) fuzzy_search: FuzzySearchConfig = Field(default_factory=FuzzySearchConfig) parsing: ParsingConfig = Field(default_factory=ParsingConfig) ast_cache: ASTCacheConfig = Field(default_factory=ASTCacheConfig) chunking: ChunkingConfig = Field(default_factory=ChunkingConfig) wiki: WikiConfig = Field(default_factory=WikiConfig) deep_research: DeepResearchConfig = Field(default_factory=DeepResearchConfig) output: OutputConfig = Field(default_factory=OutputConfig) export: ExportBatchConfig = Field(default_factory=ExportBatchConfig) prompts: PromptsConfig = Field(default_factory=PromptsConfig) plugins: PluginsConfig = Field(default_factory=PluginsConfig) hooks: HooksConfig = Field(default_factory=HooksConfig) @computed_field @property def effective_embedding_batch_size(self) -> int: """Compute optimal batch size based on provider and memory. Local providers can handle larger batches, while API providers should use smaller batches to avoid rate limits and timeouts. Returns: Optimal batch size for the current embedding provider. """ base_batch_size = self.embedding_batch.batch_size # Local providers can handle larger batches if self.embedding.provider == EmbeddingProviderType.LOCAL: # Local models benefit from larger batches for throughput return min(base_batch_size, 200) else: # API providers need smaller batches to avoid rate limits return min(base_batch_size, 50) @computed_field @property def effective_max_workers(self) -> int: """Compute worker count based on CPU cores. Ensures we do not exceed available CPU cores while respecting user configuration. Returns: Optimal worker count for parallel processing. """ cpu_count = os.cpu_count() or 4 configured_workers = self.chunking.parallel_workers # Do not exceed CPU count, but also consider configured maximum return min(configured_workers, cpu_count) @computed_field @property def effective_llm_concurrency(self) -> int: """Compute effective LLM concurrency based on provider. Local models (Ollama) run on a single GPU and benefit from limited parallelism (2-3 concurrent requests). Cloud providers handle higher concurrency but may have rate limits. Returns: Optimal LLM concurrency for the current provider. """ base_concurrency = self.wiki.max_concurrent_llm_calls # Local models: single GPU, limit to 2-3 to avoid OOM/thrashing if self.llm.provider == LLMProviderType.OLLAMA: return min(base_concurrency, 3) # Cloud providers: allow higher concurrency, cap at configured limit return base_concurrency def with_embedding_provider( self, provider: EmbeddingProviderType | str ) -> "Config": """Return a new Config with the embedding provider changed. Args: provider: The embedding provider to use. Returns: A new Config instance with the updated embedding provider. """ new_embedding = self.embedding.model_copy(update={"provider": provider}) return self.model_copy(update={"embedding": new_embedding}) def with_llm_provider(self, provider: LLMProviderType | str) -> "Config": """Return a new Config with the LLM provider changed. Args: provider: The LLM provider to use. Returns: A new Config instance with the updated LLM provider. """ new_llm = self.llm.model_copy(update={"provider": provider}) return self.model_copy(update={"llm": new_llm}) def get_prompts(self) -> ProviderPromptsConfig: """Get prompts for the currently configured LLM provider. Returns: ProviderPromptsConfig for the current LLM provider. """ return self.prompts.get_for_provider(self.llm.provider) @classmethod def load(cls, config_path: Path | None = None) -> "Config": """Load configuration from file or defaults.""" if config_path and config_path.exists(): with open(config_path) as f: data = yaml.safe_load(f) return cls.model_validate(data) # Check default locations default_paths = [ Path.home() / ".config" / "local-deepwiki" / "config.yaml", Path.home() / ".local-deepwiki.yaml", ] for path in default_paths: if path.exists(): with open(path) as f: data = yaml.safe_load(f) return cls.model_validate(data) return cls() def get_wiki_path(self, repo_path: Path) -> Path: """Get the wiki output path for a repository.""" return repo_path / self.output.wiki_dir def get_vector_db_path(self, repo_path: Path) -> Path: """Get the vector database path for a repository.""" return self.get_wiki_path(repo_path) / self.output.vector_db_name

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/UrbanDiver/local-deepwiki-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

models.py•24.9 KiB