"""Configuration validation for local-deepwiki CLI."""
from __future__ import annotations
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any
import yaml
from pydantic import ValidationError
from local_deepwiki.config import Config
@dataclass(slots=True)
class ValidationIssue:
"""Represents a configuration validation issue."""
level: str # "error" or "warning"
category: str
message: str
suggestion: str | None = None
class ConfigValidator:
"""Validates local-deepwiki configuration."""
def __init__(self, config_path: Path | None = None):
self.config_path = config_path
self.issues: list[ValidationIssue] = []
self.config: Config | None = None
self.raw_config: dict[str, Any] | None = None
def validate(self) -> bool:
"""Run all validations and return True if config is valid (no errors)."""
self.issues = []
# Step 1: Find and parse config file
if not self._load_config():
return False
# Step 2: Validate with Pydantic
if not self._validate_schema():
return False
# Step 3: Semantic validations
self._validate_llm_provider()
self._validate_embedding_provider()
self._validate_wiki_settings()
self._validate_paths()
self._validate_performance_settings()
# Return True if no errors (warnings are OK)
return not any(issue.level == "error" for issue in self.issues)
def _load_config(self) -> bool:
"""Load and parse the config file."""
config_locations = []
if self.config_path:
config_locations.append(self.config_path)
else:
# Check default locations
config_locations = [
Path.cwd() / "config.yaml",
Path.cwd() / ".local-deepwiki.yaml",
Path.home() / ".config" / "local-deepwiki" / "config.yaml",
Path.home() / ".local-deepwiki.yaml",
]
found_path = None
for path in config_locations:
if path.exists():
found_path = path
break
if found_path is None:
if self.config_path:
self.issues.append(
ValidationIssue(
level="error",
category="File",
message=f"Config file not found: {self.config_path}",
suggestion="Check the file path or create a config file",
)
)
else:
# No config file is OK - will use defaults
self.config = Config()
self.raw_config = {}
return True
return False
self.config_path = found_path
try:
with open(found_path) as f:
content = f.read()
if not content.strip():
# Empty file - use defaults
self.config = Config()
self.raw_config = {}
return True
self.raw_config = yaml.safe_load(content) or {}
except yaml.YAMLError as e:
self.issues.append(
ValidationIssue(
level="error",
category="YAML Syntax",
message=f"Invalid YAML syntax: {e}",
suggestion="Check YAML formatting (indentation, colons, etc.)",
)
)
return False
except OSError as e:
self.issues.append(
ValidationIssue(
level="error",
category="File",
message=f"Cannot read config file: {e}",
)
)
return False
return True
def _validate_schema(self) -> bool:
"""Validate config against Pydantic schema."""
try:
self.config = Config.model_validate(self.raw_config)
return True
except ValidationError as e:
for error in e.errors():
location = " -> ".join(str(loc) for loc in error["loc"])
self.issues.append(
ValidationIssue(
level="error",
category="Schema",
message=f"{location}: {error['msg']}",
suggestion=f"Expected type: {error.get('type', 'unknown')}",
)
)
return False
def _validate_llm_provider(self) -> None:
"""Validate LLM provider configuration."""
if self.config is None:
return
provider = self.config.llm.provider
if provider == "anthropic":
api_key = os.environ.get("ANTHROPIC_API_KEY")
if not api_key:
self.issues.append(
ValidationIssue(
level="error",
category="LLM Provider",
message="ANTHROPIC_API_KEY environment variable not set",
suggestion="Set ANTHROPIC_API_KEY or switch to 'ollama' provider",
)
)
elif not api_key.startswith("sk-ant-"):
self.issues.append(
ValidationIssue(
level="warning",
category="LLM Provider",
message="ANTHROPIC_API_KEY does not match expected format (sk-ant-...)",
suggestion="Verify your API key is correct",
)
)
elif provider == "openai":
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
self.issues.append(
ValidationIssue(
level="error",
category="LLM Provider",
message="OPENAI_API_KEY environment variable not set",
suggestion="Set OPENAI_API_KEY or switch to 'ollama' provider",
)
)
elif not api_key.startswith("sk-"):
self.issues.append(
ValidationIssue(
level="warning",
category="LLM Provider",
message="OPENAI_API_KEY does not match expected format (sk-...)",
suggestion="Verify your API key is correct",
)
)
elif provider == "ollama":
base_url = self.config.llm.ollama.base_url
# Check if Ollama is likely accessible
if "localhost" in base_url or "127.0.0.1" in base_url:
self.issues.append(
ValidationIssue(
level="warning",
category="LLM Provider",
message=f"Ollama configured at {base_url}",
suggestion="Ensure Ollama is running: `ollama serve`",
)
)
def _validate_embedding_provider(self) -> None:
"""Validate embedding provider configuration."""
if self.config is None:
return
provider = self.config.embedding.provider
if provider == "openai":
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
self.issues.append(
ValidationIssue(
level="error",
category="Embedding Provider",
message="OPENAI_API_KEY environment variable not set",
suggestion="Set OPENAI_API_KEY or switch to 'local' embedding provider",
)
)
elif provider == "local":
model = self.config.embedding.local.model
# Check for common model names
if model not in [
"all-MiniLM-L6-v2",
"all-mpnet-base-v2",
"paraphrase-multilingual-MiniLM-L12-v2",
]:
self.issues.append(
ValidationIssue(
level="warning",
category="Embedding Provider",
message=f"Using custom embedding model: {model}",
suggestion="Ensure this model is available from sentence-transformers",
)
)
def _validate_wiki_settings(self) -> None:
"""Validate wiki generation settings."""
if self.config is None:
return
wiki = self.config.wiki
# Check cloud provider for GitHub
if wiki.use_cloud_for_github:
if wiki.github_llm_provider == "anthropic":
if not os.environ.get("ANTHROPIC_API_KEY"):
self.issues.append(
ValidationIssue(
level="error",
category="Wiki Settings",
message="use_cloud_for_github enabled but ANTHROPIC_API_KEY not set",
suggestion="Set ANTHROPIC_API_KEY or disable use_cloud_for_github",
)
)
elif wiki.github_llm_provider == "openai":
if not os.environ.get("OPENAI_API_KEY"):
self.issues.append(
ValidationIssue(
level="error",
category="Wiki Settings",
message="use_cloud_for_github enabled but OPENAI_API_KEY not set",
suggestion="Set OPENAI_API_KEY or disable use_cloud_for_github",
)
)
# Check chat provider
chat_provider = wiki.chat_llm_provider
if chat_provider not in ("default", self.config.llm.provider):
if chat_provider == "anthropic" and not os.environ.get("ANTHROPIC_API_KEY"):
self.issues.append(
ValidationIssue(
level="error",
category="Wiki Settings",
message=f"chat_llm_provider is '{chat_provider}' but API key not set",
suggestion="Set ANTHROPIC_API_KEY or use 'default' provider",
)
)
elif chat_provider == "openai" and not os.environ.get("OPENAI_API_KEY"):
self.issues.append(
ValidationIssue(
level="error",
category="Wiki Settings",
message=f"chat_llm_provider is '{chat_provider}' but API key not set",
suggestion="Set OPENAI_API_KEY or use 'default' provider",
)
)
# Performance warnings
if wiki.max_concurrent_llm_calls > 10:
self.issues.append(
ValidationIssue(
level="warning",
category="Wiki Settings",
message=f"max_concurrent_llm_calls is {wiki.max_concurrent_llm_calls}",
suggestion="High values may cause rate limiting or memory issues",
)
)
def _validate_paths(self) -> None:
"""Validate path-related settings."""
if self.config is None:
return
# Check exclude patterns for common issues
exclude_patterns = self.config.parsing.exclude_patterns
if not any("node_modules" in p for p in exclude_patterns):
self.issues.append(
ValidationIssue(
level="warning",
category="Parsing",
message="node_modules not in exclude_patterns",
suggestion="Add 'node_modules/**' to avoid indexing dependencies",
)
)
if not any(".git" in p for p in exclude_patterns):
self.issues.append(
ValidationIssue(
level="warning",
category="Parsing",
message=".git not in exclude_patterns",
suggestion="Add '.git/**' to avoid indexing git objects",
)
)
def _validate_performance_settings(self) -> None:
"""Validate performance-related settings."""
if self.config is None:
return
chunking = self.config.chunking
# Check parallel workers
cpu_count = os.cpu_count() or 4
if chunking.parallel_workers > cpu_count * 2:
self.issues.append(
ValidationIssue(
level="warning",
category="Performance",
message=f"parallel_workers ({chunking.parallel_workers}) > 2x CPU count ({cpu_count})",
suggestion=f"Consider reducing to {cpu_count} for optimal performance",
)
)
# Check chunk sizes
if chunking.max_chunk_tokens > 1024:
self.issues.append(
ValidationIssue(
level="warning",
category="Performance",
message=f"max_chunk_tokens is {chunking.max_chunk_tokens}",
suggestion="Large chunks may reduce search quality. Consider 512-1024 tokens.",
)
)
# Check cache settings
if not self.config.embedding_cache.enabled:
self.issues.append(
ValidationIssue(
level="warning",
category="Performance",
message="Embedding cache is disabled",
suggestion="Enable caching for faster repeated operations",
)
)
if not self.config.llm_cache.enabled:
self.issues.append(
ValidationIssue(
level="warning",
category="Performance",
message="LLM cache is disabled",
suggestion="Enable caching to reduce API costs and latency",
)
)