Local DeepWiki MCP Server

test_integration_analysis.py•30.9 KiB

"""Integration tests for analysis handlers: search, entity, metadata, and deep research. This test module validates the analysis handler pipelines that were not covered by test_integration_pipeline.py: - search_wiki (full-text search across wiki pages and entities) - fuzzy_search (Levenshtein-based name matching) - explain_entity (composite: glossary + call graph + API docs) - impact_analysis (blast radius via reverse call graph) - get_file_context (imports, callers, related files) - get_complexity_metrics (cyclomatic complexity via tree-sitter) - deep_research (multi-step reasoning pipeline) All tests use real VectorStore (LanceDB) with content-aware embeddings and mock LLM providers to avoid external dependencies. """ import hashlib import json import math import time from contextlib import ExitStack from pathlib import Path from typing import Any from unittest.mock import AsyncMock, MagicMock, patch import pytest from local_deepwiki.config import ChunkingConfig, Config, ParsingConfig, WikiConfig from local_deepwiki.core.indexer import RepositoryIndexer from local_deepwiki.models import ( IndexStatus, WikiPage, ) from local_deepwiki.providers.base import EmbeddingProvider # ============================================================================= # Content-Aware Embedding Provider (self-contained per project convention) # ============================================================================= class ContentAwareEmbeddingProvider(EmbeddingProvider): """Mock embedding that produces different vectors per text, making search meaningful. Each text is hashed to produce distinguishable normalized vectors so that vector search actually ranks results by relevance. """ def __init__(self, dimension: int = 384): self._dimension = dimension @property def name(self) -> str: return "mock:content-aware" @property def dimension(self) -> int: return self._dimension async def embed(self, texts: list[str]) -> list[list[float]]: """Hash-based embeddings: different content produces different normalized vectors.""" results = [] for text in texts: h = hashlib.sha256(text.encode()).digest() raw = [h[i % len(h)] / 255.0 for i in range(self._dimension)] norm = math.sqrt(sum(x * x for x in raw)) vec = [x / norm for x in raw] if norm > 0 else raw results.append(vec) return results # ============================================================================= # Fixtures # ============================================================================= @pytest.fixture def sample_python_repo(tmp_path: Path) -> Path: """Create a sample Python repo with classes, functions, imports, and tests. Richer than the basic fixture to exercise analysis handler features: - Cross-file imports (main.py imports from utils.py) - Inheritance (UserModel extends BaseModel) - Functions with docstrings, type hints, parameters (API docs, complexity) - Test file referencing entities (test examples extraction) """ repo_path = tmp_path / "sample_repo" repo_path.mkdir() src_dir = repo_path / "src" src_dir.mkdir() # Main module with classes and cross-file import (src_dir / "main.py").write_text( '"""Main application module."""\n' "\n" "from src.utils import validate_config\n" "\n" "\n" "class Application:\n" ' """Main application class.\n' "\n" " Handles initialization and lifecycle of the application.\n" ' """\n' "\n" " def __init__(self, config: dict):\n" ' """Initialize the application.\n' "\n" " Args:\n" " config: Configuration dictionary.\n" ' """\n' " self.config = config\n" " self._running = False\n" "\n" " def start(self) -> None:\n" ' """Start the application."""\n' " if validate_config(self.config):\n" " self._running = True\n" "\n" " def stop(self) -> None:\n" ' """Stop the application."""\n' " self._running = False\n" "\n" " @property\n" " def is_running(self) -> bool:\n" ' """Check if application is running."""\n' " return self._running\n" "\n" "\n" "def create_app(config: dict) -> Application:\n" ' """Factory function to create an Application instance.\n' "\n" " Args:\n" " config: Application configuration.\n" "\n" " Returns:\n" " Configured Application instance.\n" ' """\n' " return Application(config)\n" ) # Utils module with helper functions (src_dir / "utils.py").write_text( '"""Utility functions for the application."""\n' "\n" "from typing import Any\n" "\n" "\n" "def validate_config(config: dict) -> bool:\n" ' """Validate the configuration dictionary.\n' "\n" " Args:\n" " config: Configuration to validate.\n" "\n" " Returns:\n" " True if valid, False otherwise.\n" ' """\n' ' required_keys = ["name", "version"]\n' " return all(key in config for key in required_keys)\n" "\n" "\n" "def format_output(data: Any) -> str:\n" ' """Format data for output.\n' "\n" " Args:\n" " data: Data to format.\n" "\n" " Returns:\n" " Formatted string representation.\n" ' """\n' " if isinstance(data, dict):\n" ' return "\\n".join(f"{k}: {v}" for k, v in data.items())\n' " return str(data)\n" ) # Models module with inheritance (src_dir / "models.py").write_text( '"""Data models for the application."""\n' "\n" "\n" "class BaseModel:\n" ' """Base model with common fields."""\n' "\n" " def __init__(self, id: int, name: str):\n" " self.id = id\n" " self.name = name\n" "\n" " def to_dict(self) -> dict:\n" ' """Convert to dictionary."""\n' ' return {"id": self.id, "name": self.name}\n' "\n" "\n" "class UserModel(BaseModel):\n" ' """User model extending BaseModel."""\n' "\n" " def __init__(self, id: int, name: str, email: str):\n" " super().__init__(id, name)\n" " self.email = email\n" "\n" " def to_dict(self) -> dict:\n" ' """Convert to dictionary with email."""\n' " base = super().to_dict()\n" ' return {**base, "email": self.email}\n' ) # Test file tests_dir = repo_path / "tests" tests_dir.mkdir() (tests_dir / "test_main.py").write_text( '"""Tests for main module."""\n' "\n" "import pytest\n" "from src.main import Application, create_app\n" "\n" "\n" "def test_application_init():\n" ' """Test Application initialization."""\n' ' app = Application({"name": "test"})\n' ' assert app.config == {"name": "test"}\n' " assert not app.is_running\n" "\n" "\n" "def test_create_app():\n" ' """Test create_app factory function."""\n' ' app = create_app({"name": "test"})\n' " assert isinstance(app, Application)\n" ) return repo_path @pytest.fixture def test_config() -> Config: """Create a test configuration optimized for integration tests.""" chunking = ChunkingConfig().model_copy( update={"batch_size": 10, "max_chunk_size": 2000} ) parsing = ParsingConfig().model_copy(update={"languages": ["python"]}) wiki = WikiConfig().model_copy(update={"max_concurrent_llm": 2}) return Config().model_copy( update={"chunking": chunking, "parsing": parsing, "wiki": wiki} ) @pytest.fixture async def indexed_repo(sample_python_repo: Path, tmp_path: Path, test_config: Config): """Index sample_python_repo into a real VectorStore with wiki artifacts. Generates: - Real LanceDB vector store with code chunks - search.json with pages and entity entries - toc.json with page listing Returns: Tuple of (repo_path, wiki_path, vector_store, index_status, config). """ from local_deepwiki.core.vectorstore import VectorStore from local_deepwiki.generators.search import generate_full_search_index embedding_provider = ContentAwareEmbeddingProvider() db_path = tmp_path / "vectors.lance" vector_store = VectorStore(db_path, embedding_provider) indexer = RepositoryIndexer(sample_python_repo, test_config) indexer.vector_store = vector_store index_status = await indexer.index(full_rebuild=True) wiki_path = indexer.wiki_path # Create minimal wiki pages for search.json generation now = time.time() pages = [ WikiPage( path="index.md", title="Sample Repo", content="# Sample Repo\n\nA sample Python application with Application class.", generated_at=now, ), WikiPage( path="files/src/main.md", title="main.py", content="# main.py\n\nMain module with Application class and create_app function.", generated_at=now, ), WikiPage( path="files/src/utils.md", title="utils.py", content="# utils.py\n\nUtility module with validate_config and format_output.", generated_at=now, ), WikiPage( path="files/src/models.md", title="models.py", content="# models.py\n\nData models with BaseModel and UserModel classes.", generated_at=now, ), ] # Write wiki markdown files for page in pages: page_path = wiki_path / page.path page_path.parent.mkdir(parents=True, exist_ok=True) page_path.write_text(page.content) # Generate full search index (pages + entities from real vectorstore) search_index = await generate_full_search_index(pages, index_status, vector_store) (wiki_path / "search.json").write_text(json.dumps(search_index, indent=2)) # Generate toc.json toc_data = { "pages": [ { "title": p.title, "path": p.path, "source_file": p.path.replace("files/", "").replace(".md", ".py") if p.path.startswith("files/") else "", } for p in pages ] } (wiki_path / "toc.json").write_text(json.dumps(toc_data, indent=2)) return (sample_python_repo, wiki_path, vector_store, index_status, test_config) # ============================================================================= # Plumbing Helpers # ============================================================================= def _make_permissive_access_controller(): """Create a permissive mock access controller.""" mock_ac = MagicMock() mock_ac.require_permission = MagicMock() mock_ac.get_current_subject.return_value = None return mock_ac def _patch_analysis_plumbing( module_path: str, index_status: IndexStatus, wiki_path: Path, config: Config, vector_store: Any = None, ) -> ExitStack: """Patch shared handler plumbing for an analysis handler module. Each analysis handler module imports _load_index_status, _create_vector_store, and get_access_controller from _shared. This helper patches them at the correct import location. Args: module_path: Dotted module path (e.g. "local_deepwiki.handlers.analysis_search"). index_status: IndexStatus to return from _load_index_status. wiki_path: Wiki path to return from _load_index_status. config: Config to return from _load_index_status. vector_store: Optional VectorStore to return from _create_vector_store. Returns: ExitStack with active patches. """ stack = ExitStack() # _load_index_status -> return test data stack.enter_context( patch( f"{module_path}._load_index_status", new_callable=AsyncMock, return_value=(index_status, wiki_path, config), ) ) # _create_vector_store -> return real or mock vector store if vector_store is not None: stack.enter_context( patch( f"{module_path}._create_vector_store", return_value=vector_store, ) ) # Permissive RBAC stack.enter_context( patch( f"{module_path}.get_access_controller", return_value=_make_permissive_access_controller(), ) ) # No-op query validation (where used) try: stack.enter_context(patch(f"{module_path}.validate_query_parameters")) except AttributeError: pass # Not all modules import validate_query_parameters return stack # ============================================================================= # Search Wiki Integration Tests # ============================================================================= class TestSearchWikiIntegration: """Tests for handle_search_wiki with real search.json from indexed repo.""" async def test_search_wiki_finds_entity_by_name(self, indexed_repo): """Search for 'Application' and verify entity match with correct type and file.""" from local_deepwiki.handlers.analysis_search import handle_search_wiki repo_path, wiki_path, vector_store, index_status, config = indexed_repo with _patch_analysis_plumbing( "local_deepwiki.handlers.analysis_search", index_status, wiki_path, config, ): result = await handle_search_wiki( {"repo_path": str(repo_path), "query": "Application", "limit": 10} ) assert len(result) == 1 data = json.loads(result[0].text) assert data["status"] == "success" assert data["total_matches"] > 0 # Should find the Application class entity entity_matches = [m for m in data["matches"] if m["type"] == "entity"] assert len(entity_matches) > 0 app_match = next( (m for m in entity_matches if "Application" in (m.get("name") or "")), None, ) assert app_match is not None assert app_match["entity_type"] == "class" assert "main.py" in app_match["file"] async def test_search_wiki_finds_page_by_title(self, indexed_repo): """Search for a page title and verify page match with path.""" from local_deepwiki.handlers.analysis_search import handle_search_wiki repo_path, wiki_path, vector_store, index_status, config = indexed_repo with _patch_analysis_plumbing( "local_deepwiki.handlers.analysis_search", index_status, wiki_path, config, ): result = await handle_search_wiki( {"repo_path": str(repo_path), "query": "utils", "limit": 10} ) data = json.loads(result[0].text) assert data["status"] == "success" assert data["total_matches"] > 0 # Should find pages or entities related to utils all_names = [m.get("title") or m.get("name", "") for m in data["matches"]] assert any("utils" in name.lower() for name in all_names) # ============================================================================= # Fuzzy Search Integration Tests # ============================================================================= class TestFuzzySearchIntegration: """Tests for handle_fuzzy_search with real VectorStore chunks.""" async def test_fuzzy_search_finds_similar_name(self, indexed_repo): """Search for 'Applicaton' (typo) and verify 'Application' appears.""" from local_deepwiki.handlers.analysis_search import handle_fuzzy_search repo_path, wiki_path, vector_store, index_status, config = indexed_repo with _patch_analysis_plumbing( "local_deepwiki.handlers.analysis_search", index_status, wiki_path, config, vector_store, ): result = await handle_fuzzy_search( { "repo_path": str(repo_path), "query": "Applicaton", "threshold": 0.5, "limit": 10, } ) data = json.loads(result[0].text) assert data["status"] == "success" assert data["total_matches"] > 0 match_names = [m["name"] for m in data["matches"]] assert "Application" in match_names # Each match should have a score > 0 for match in data["matches"]: assert match["score"] > 0 async def test_fuzzy_search_includes_file_suggestions(self, indexed_repo): """Search for 'main' and verify file_suggestions includes main.py.""" from local_deepwiki.handlers.analysis_search import handle_fuzzy_search repo_path, wiki_path, vector_store, index_status, config = indexed_repo with _patch_analysis_plumbing( "local_deepwiki.handlers.analysis_search", index_status, wiki_path, config, vector_store, ): result = await handle_fuzzy_search( { "repo_path": str(repo_path), "query": "main", "threshold": 0.3, "limit": 10, } ) data = json.loads(result[0].text) assert data["status"] == "success" file_suggestions = data.get("file_suggestions", []) assert any("main.py" in f for f in file_suggestions) # ============================================================================= # Explain Entity Integration Tests # ============================================================================= class TestExplainEntityIntegration: """Tests for handle_explain_entity with real search.json and source files.""" async def test_explain_entity_class(self, indexed_repo): """Explain 'Application' class and verify entity_found, entity_info, call_graph, api_docs.""" from local_deepwiki.handlers.analysis_entity import handle_explain_entity repo_path, wiki_path, vector_store, index_status, config = indexed_repo with _patch_analysis_plumbing( "local_deepwiki.handlers.analysis_entity", index_status, wiki_path, config, vector_store, ): result = await handle_explain_entity( { "repo_path": str(repo_path), "entity_name": "Application", "include_call_graph": True, "include_api_docs": True, "include_inheritance": False, "include_test_examples": False, } ) data = json.loads(result[0].text) assert data["status"] == "success" assert data["entity_found"] is True # Entity info entity_info = data["entity_info"] assert entity_info["type"] == "class" assert "main.py" in entity_info["file"] # Call graph should be present (may be empty but structurally valid) assert "call_graph" in data assert "calls" in data["call_graph"] assert "called_by" in data["call_graph"] # API docs should have class methods assert "api_docs" in data api_docs = data["api_docs"] assert "methods" in api_docs method_names = [m["name"] for m in api_docs["methods"]] assert "__init__" in method_names assert "start" in method_names async def test_explain_entity_function(self, indexed_repo): """Explain 'validate_config' function and verify api_docs with parameters.""" from local_deepwiki.handlers.analysis_entity import handle_explain_entity repo_path, wiki_path, vector_store, index_status, config = indexed_repo with _patch_analysis_plumbing( "local_deepwiki.handlers.analysis_entity", index_status, wiki_path, config, vector_store, ): result = await handle_explain_entity( { "repo_path": str(repo_path), "entity_name": "validate_config", "include_call_graph": True, "include_api_docs": True, "include_inheritance": False, "include_test_examples": False, } ) data = json.loads(result[0].text) assert data["status"] == "success" assert data["entity_found"] is True assert data["entity_info"]["type"] == "function" # API docs for the function assert "api_docs" in data api_docs = data["api_docs"] assert "parameters" in api_docs param_names = [p["name"] for p in api_docs["parameters"]] assert "config" in param_names assert api_docs["return_type"] == "bool" # ============================================================================= # Impact Analysis Integration Tests # ============================================================================= class TestImpactAnalysisIntegration: """Tests for handle_impact_analysis with real VectorStore and source files.""" async def test_impact_analysis_on_utils(self, indexed_repo): """Analyze impact of src/utils.py and verify reverse_call_graph and impact_summary.""" from local_deepwiki.handlers.analysis_entity import handle_impact_analysis repo_path, wiki_path, vector_store, index_status, config = indexed_repo with _patch_analysis_plumbing( "local_deepwiki.handlers.analysis_entity", index_status, wiki_path, config, vector_store, ): result = await handle_impact_analysis( { "repo_path": str(repo_path), "file_path": "src/utils.py", "include_reverse_calls": True, "include_inheritance": False, "include_dependents": False, "include_wiki_pages": True, } ) data = json.loads(result[0].text) assert data["status"] == "success" assert data["file_path"] == "src/utils.py" # Reverse call graph should be present assert "reverse_call_graph" in data # Impact summary should have risk_level assert "impact_summary" in data summary = data["impact_summary"] assert "risk_level" in summary assert summary["risk_level"] in ("low", "medium", "high") # affected_wiki_pages should be present (may be empty) assert "affected_wiki_pages" in data # ============================================================================= # File Context Integration Tests # ============================================================================= class TestFileContextIntegration: """Tests for handle_get_file_context with real VectorStore chunks.""" async def test_file_context_shows_imports_and_related(self, indexed_repo): """Get context for src/main.py and verify imports and related_files.""" from local_deepwiki.handlers.analysis_metadata import handle_get_file_context repo_path, wiki_path, vector_store, index_status, config = indexed_repo with _patch_analysis_plumbing( "local_deepwiki.handlers.analysis_metadata", index_status, wiki_path, config, vector_store, ): result = await handle_get_file_context( {"repo_path": str(repo_path), "file_path": "src/main.py"} ) data = json.loads(result[0].text) assert data["status"] == "success" context = data["context"] assert context["file_path"] == "src/main.py" # Should have imports (main.py imports from utils) assert "imports" in context assert isinstance(context["imports"], list) # Should have related_files assert "related_files" in context assert isinstance(context["related_files"], list) # ============================================================================= # Complexity Metrics Integration Tests # ============================================================================= class TestComplexityMetricsIntegration: """Tests for handle_get_complexity_metrics with real source files.""" async def test_complexity_metrics_on_python_file(self, indexed_repo): """Analyze src/main.py and verify functions, classes, and complexity fields.""" from local_deepwiki.handlers.analysis_metadata import ( handle_get_complexity_metrics, ) repo_path, wiki_path, vector_store, index_status, config = indexed_repo # Only need access controller patch (no index or vectorstore needed) with patch( "local_deepwiki.handlers.analysis_metadata.get_access_controller", return_value=_make_permissive_access_controller(), ): result = await handle_get_complexity_metrics( {"repo_path": str(repo_path), "file_path": "src/main.py"} ) data = json.loads(result[0].text) assert data["status"] == "success" assert data["file_path"] == "src/main.py" assert data["language"] == "python" # Lines assert "lines" in data assert data["lines"]["total"] > 0 # Counts - should have at least the Application class and its methods assert "counts" in data assert data["counts"]["classes"] >= 1 assert data["counts"]["functions"] >= 1 # Complexity metrics assert "complexity" in data assert "avg_cyclomatic" in data["complexity"] assert "max_cyclomatic" in data["complexity"] # Functions list assert "functions" in data func_names = [f["name"] for f in data["functions"]] assert "__init__" in func_names # Classes list assert "classes" in data class_names = [c["name"] for c in data["classes"]] assert "Application" in class_names # ============================================================================= # Deep Research Integration Tests # ============================================================================= class TestDeepResearchIntegration: """Tests for handle_deep_research with real VectorStore and mock LLM.""" async def test_deep_research_produces_structured_result(self, indexed_repo): """Run deep research and verify structured result with answer and sub_questions.""" from local_deepwiki.handlers.research import handle_deep_research repo_path, wiki_path, vector_store, index_status, config = indexed_repo # Mock LLM that returns canned responses for each research phase mock_llm = MagicMock() async def mock_generate(prompt: str, **kwargs) -> str: prompt_lower = prompt.lower() if "decompose" in prompt_lower or "sub-question" in prompt_lower: return json.dumps( { "sub_questions": [ { "question": "What is the Application class?", "category": "definition", }, {"question": "How does it start?", "category": "behavior"}, ] } ) if "gap" in prompt_lower or "follow-up" in prompt_lower: return json.dumps({"follow_up_queries": []}) # Synthesis / default return ( "The Application class manages the app lifecycle. " "It initializes with a config dict and provides start/stop methods." ) mock_llm.generate = AsyncMock(side_effect=mock_generate) stack = ExitStack() # Patch get_config to return test config with correct paths mock_config = MagicMock() mock_config.get_vector_db_path.return_value = vector_store.db_path mock_config.get_wiki_path.return_value = wiki_path mock_config.embedding = config.embedding mock_config.llm = config.llm mock_config.llm_cache = config.llm_cache mock_config.deep_research = config.deep_research mock_config.get_prompts.return_value = config.get_prompts() stack.enter_context( patch( "local_deepwiki.handlers.research.get_config", return_value=mock_config, ) ) # Patch get_embedding_provider to return our content-aware provider stack.enter_context( patch( "local_deepwiki.handlers.research.get_embedding_provider", return_value=ContentAwareEmbeddingProvider(), ) ) # Patch VectorStore in research module to return our real store stack.enter_context( patch( "local_deepwiki.handlers.research.VectorStore", return_value=vector_store, ) ) # Patch get_cached_llm_provider to return our mock stack.enter_context( patch( "local_deepwiki.providers.llm.get_cached_llm_provider", return_value=mock_llm, ) ) # Patch RBAC stack.enter_context( patch( "local_deepwiki.handlers.research.get_access_controller", return_value=_make_permissive_access_controller(), ) ) # Patch validation (no-op) stack.enter_context( patch("local_deepwiki.handlers.research.validate_deep_research_parameters") ) with stack: result = await handle_deep_research( { "repo_path": str(repo_path), "question": "How does the Application class work?", "max_chunks": 20, } ) assert len(result) == 1 data = json.loads(result[0].text) # Should be a successful research result (not an error) # The result may have "question" + "answer" (success) or "status" key if "status" in data and data["status"] == "error": pytest.fail(f"Deep research returned error: {data}") # Verify structured result assert "answer" in data assert len(data["answer"]) > 0 assert "sub_questions" in data assert "stats" in data assert data["stats"]["chunks_analyzed"] >= 0

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/UrbanDiver/local-deepwiki-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_integration_analysis.py•30.9 KiB