ChunkHound

Overview Schema Related Servers Score Discussions

chunkhound
tests
operations

test_code_mapper_integration.py•13.4 kB

from pathlib import Path from typing import Any import pytest import chunkhound.api.cli.commands.code_mapper as code_mapper_mod from chunkhound.code_mapper import service as code_mapper_service from chunkhound.code_mapper.models import CodeMapperPOI from chunkhound.core.config.config import Config class DummyProvider: """Minimal provider stub exposing chunks for coverage stats.""" def __init__(self) -> None: self._chunks = [ {"file_path": "scope/a.py", "start_line": 1, "end_line": 10}, {"file_path": "scope/b.py", "start_line": 5, "end_line": 15}, ] def get_scope_stats(self, scope_prefix: str | None) -> tuple[int, int]: # Include one extra indexed file that is never referenced in the fake # deep-research results so code_mapper can emit an "unreferenced files" # artifact. if scope_prefix == "scope/": return 3, 2 return 0, 0 def get_scope_file_paths(self, scope_prefix: str | None) -> list[str]: if scope_prefix == "scope/": return ["scope/a.py", "scope/b.py", "scope/c.py"] return [] def get_all_chunks_with_metadata(self) -> list[dict[str, Any]]: return list(self._chunks) class DummyServices: """Services stub that only exposes a provider for coverage stats.""" def __init__(self) -> None: self.provider = DummyProvider() class DummyLLMManager: """Placeholder LLM manager for code_mapper tests.""" def __init__(self, *_: Any, **__: Any) -> None: self._configured = True def is_configured(self) -> bool: return self._configured async def _run_code_mapper_with_stubs( *, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], set_combined: bool, cli_combined: bool | None = None, poi_concurrency_env: str | None = None, comprehensiveness: str = "low", ) -> tuple[Path, str, list[int]]: project_root = tmp_path / "repo" scope_path = project_root / "scope" scope_path.mkdir(parents=True, exist_ok=True) (scope_path / "a.py").write_text("print('a')\n", encoding="utf-8") (scope_path / "b.py").write_text("print('b')\n", encoding="utf-8") (scope_path / "c.py").write_text("print('c')\n", encoding="utf-8") # Use a minimal config; database/embedding/llm implementations are stubbed. config = Config( target_dir=project_root, database={"path": project_root / ".chunkhound" / "db", "provider": "duckdb"}, embedding={ "provider": "openai", "api_key": "test", "model": "text-embedding-3-small", }, llm={"provider": "openai", "api_key": "test"}, ) # Stub out database, services, embeddings, LLM, and deep research. seen_max_points: list[int] = [] async def fake_overview( llm_manager: Any, target_dir: Path, scope_path: Path, scope_label: str, meta: Any | None = None, context: str | None = None, max_points: int = 10, comprehensiveness: str = "medium", out_dir: Path | None = None, map_hyde_provider: Any | None = None, indexing_cfg: Any | None = None, ) -> tuple[str, list[CodeMapperPOI]]: _ = (llm_manager, target_dir, scope_path, scope_label) _ = (meta, context, comprehensiveness, out_dir, map_hyde_provider, indexing_cfg) # Record the requested max_points so we can assert comprehensiveness mapping. seen_max_points.append(max_points) overview = ( "1. **Core Flow**: High-level data flow.\n" "2. **Error Handling**: How failures are surfaced.\n" ) points = [ CodeMapperPOI( mode="architectural", text="Core Flow: High-level data flow.", ), CodeMapperPOI( mode="architectural", text="Error Handling: How failures are surfaced.", ), ] return overview, points[:max_points] async def fake_run_deep_research( *, query: str, **__: Any, ) -> dict[str, Any]: # Return a minimal answer and sources metadata for coverage. # For one of the bullets, simulate an empty answer so it is skipped. # This specifically skips the FIRST POI to regression-test that topic # headings and content remain aligned (no zip-based mispairing). if "Core Flow" in query: return { "answer": "", "metadata": { "sources": { "files": [], "chunks": [], }, "aggregation_stats": {"files_total": 2, "chunks_total": 2}, }, } return { "answer": f"Section for query: {query[:40]}", "metadata": { "sources": { "files": ["scope/a.py", "scope/b.py"], "chunks": [ {"file_path": "scope/a.py", "start_line": 1, "end_line": 10}, {"file_path": "scope/b.py", "start_line": 5, "end_line": 15}, ], }, "aggregation_stats": {"files_total": 2, "chunks_total": 2}, }, } monkeypatch.setattr( code_mapper_mod, "verify_database_exists", lambda cfg: cfg.database.get_db_path(), ) monkeypatch.setattr( code_mapper_mod, "create_services", lambda db_path, config, embedding_manager: DummyServices(), ) monkeypatch.setattr(code_mapper_mod, "LLMManager", DummyLLMManager) monkeypatch.setattr( code_mapper_service, "run_code_mapper_overview_hyde", fake_overview, raising=True, ) monkeypatch.setattr( code_mapper_service, "run_deep_research", fake_run_deep_research, raising=True ) if set_combined: monkeypatch.setenv("CH_CODE_MAPPER_WRITE_COMBINED", "1") else: monkeypatch.delenv("CH_CODE_MAPPER_WRITE_COMBINED", raising=False) if poi_concurrency_env is None: monkeypatch.delenv("CH_CODE_MAPPER_POI_CONCURRENCY", raising=False) else: monkeypatch.setenv("CH_CODE_MAPPER_POI_CONCURRENCY", poi_concurrency_env) class Args: def __init__(self) -> None: self.path = scope_path self.verbose = False self.overview_only = False self.out = tmp_path / "out" self.comprehensiveness = comprehensiveness self.combined = cli_combined args = Args() await code_mapper_mod.code_mapper_command(args, config) captured = capsys.readouterr().out return args.out, captured, seen_max_points @pytest.mark.asyncio async def test_code_mapper_end_to_end_default_omits_combined_doc( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: """Code Mapper should skip the combined doc by default and still write topics. This is a lightweight integration test that exercises the code_mapper command with stubbed services and deep-research behavior. It verifies that: - the main document is not printed to stdout, - coverage stats are computed without crashing, - an index file plus one markdown file per point-of-interest are written, and - the combined doc is omitted unless explicitly enabled. """ ( out_dir, captured, seen_max_points, ) = await _run_code_mapper_with_stubs( tmp_path=tmp_path, monkeypatch=monkeypatch, capsys=capsys, set_combined=False, ) # Default behavior: avoid printing the full document to stdout. assert "# Code Mapper for" not in captured assert "## Coverage Summary" not in captured combined_docs = list(out_dir.glob("*_code_mapper.md")) assert not combined_docs, "Combined Code Mapper doc should be disabled by default" assert "topics failed after a retry" in captured index_files = list(out_dir.glob("*_code_mapper_index.md")) assert index_files, "Expected a Code Mapper index file to be written" index_content = index_files[0].read_text(encoding="utf-8") assert "unreferenced_in_scope: 1" in index_content assert "total_research_calls: 2" in index_content assert "total_indexed: 3" in index_content assert "coverage: 66.67%" in index_content assert "scope_scope_unreferenced_files.txt" in index_content topic_files = list(out_dir.glob("*_topic_*.md")) # One of the deep research calls returns an empty answer and should fail after # a retry, producing a (failed) topic entry that is omitted from the combined # doc but still written as a topic file. assert len(topic_files) == 2 topic_contents = [path.read_text(encoding="utf-8") for path in topic_files] assert any(content.startswith("# Error Handling") for content in topic_contents) assert any(content.startswith("# Core Flow (failed)") for content in topic_contents) assert "Core Flow (failed)" in index_content unref_files = list(out_dir.glob("*_scope_unreferenced_files.txt")) assert unref_files, "Expected unreferenced files artifact to be written" unref_content = unref_files[0].read_text(encoding="utf-8") assert "scope/c.py" in unref_content # Comprehensiveness=low should map to fewer max_points for the overview. assert seen_max_points, "Expected fake_overview to be called" assert seen_max_points[0] == 5 @pytest.mark.asyncio async def test_code_mapper_invalid_poi_concurrency_env_exits_2( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: with pytest.raises(SystemExit) as excinfo: await _run_code_mapper_with_stubs( tmp_path=tmp_path, monkeypatch=monkeypatch, capsys=capsys, set_combined=False, poi_concurrency_env="nope", ) code = excinfo.value.code if isinstance(excinfo.value.code, int) else 1 assert code == 2 captured = capsys.readouterr().out assert "CH_CODE_MAPPER_POI_CONCURRENCY" in captured @pytest.mark.asyncio async def test_code_mapper_end_to_end_combined_doc_requires_flag( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: """Combined Code Mapper doc should be written only when the env flag is set.""" ( out_dir, captured, _seen_max_points, ) = await _run_code_mapper_with_stubs( tmp_path=tmp_path, monkeypatch=monkeypatch, capsys=capsys, set_combined=True, ) # Default behavior: avoid printing the full document to stdout. assert "# Code Mapper for" not in captured assert "## Coverage Summary" not in captured combined_docs = list(out_dir.glob("*_code_mapper.md")) assert combined_docs, "Expected a combined Code Mapper document to be written" combined_content = combined_docs[0].read_text(encoding="utf-8") assert "agent_doc_metadata:" in combined_content assert "code_mapper_comprehensiveness: low" in combined_content assert "# Code Mapper for" in combined_content assert "## Coverage Summary" in combined_content # Skipping the first POI should renumber topics contiguously and keep # headings aligned with the surviving result. assert "## 1. Error Handling" in combined_content @pytest.mark.asyncio async def test_code_mapper_end_to_end_combined_cli_overrides_env_off( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: """--no-combined should disable combined output even if env is set.""" ( out_dir, _captured, _seen_max_points, ) = await _run_code_mapper_with_stubs( tmp_path=tmp_path, monkeypatch=monkeypatch, capsys=capsys, set_combined=True, cli_combined=False, ) combined_docs = list(out_dir.glob("*_code_mapper.md")) assert not combined_docs @pytest.mark.asyncio async def test_code_mapper_end_to_end_combined_cli_overrides_env_on( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: """--combined should enable combined output even if env is unset.""" ( out_dir, _captured, _seen_max_points, ) = await _run_code_mapper_with_stubs( tmp_path=tmp_path, monkeypatch=monkeypatch, capsys=capsys, set_combined=False, cli_combined=True, ) combined_docs = list(out_dir.glob("*_code_mapper.md")) assert combined_docs @pytest.mark.asyncio async def test_code_mapper_ultra_requests_max_points( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: """Comprehensiveness=ultra should request more points-of-interest.""" ( out_dir, _captured, seen_max_points, ) = await _run_code_mapper_with_stubs( tmp_path=tmp_path, monkeypatch=monkeypatch, capsys=capsys, set_combined=False, comprehensiveness="ultra", ) assert out_dir.exists() assert seen_max_points, "Expected overview to be called" assert seen_max_points[0] == 20

Loading blob content...

Latest Blog Posts

Don't Use Large Strings as Cache Keys
By punkpeye on January 11, 2026.
markdown
node-js
cache
What are Claude Skills?
By punkpeye on January 10, 2026.
mcp
skills
How to Test MCP Streamable HTTP Endpoints Using cURL
By punkpeye on January 2, 2026.
tutorial
bash

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ofriw/chunkhound'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_code_mapper_integration.py•13.4 kB