ChunkHound

Overview Schema Related Servers Score Discussions

test_query_expander.py•9.76 KiB

"""Unit tests for QueryExpander in deep research. Tests query building strategies and LLM-based query expansion without requiring real API calls. """ import pytest from chunkhound.llm_manager import LLMManager from chunkhound.services.research.models import ResearchContext from chunkhound.services.research.query_expander import QueryExpander from tests.fixtures.fake_providers import FakeLLMProvider @pytest.fixture def fake_llm_provider(): """Create fake LLM provider with scripted responses.""" return FakeLLMProvider( responses={ # Match any query expansion request (searches for words in the prompt) "query": '{"queries": ["authentication system implementation", "user auth flow design"]}', "search": '{"queries": ["code search variation 1", "code search variation 2"]}', "authentication": '{"queries": ["auth system architecture", "authentication workflow"]}', "fallback": "default response", } ) @pytest.fixture def llm_manager(fake_llm_provider, monkeypatch): """Create LLM manager with fake provider. Monkeypatch the provider creation to return our fake provider instead of initializing real providers. """ # Mock the _create_provider method to return our fake provider def mock_create_provider(self, config): return fake_llm_provider monkeypatch.setattr(LLMManager, "_create_provider", mock_create_provider) # Create manager with minimal config utility_config = {"provider": "fake", "model": "fake-gpt"} synthesis_config = {"provider": "fake", "model": "fake-gpt"} manager = LLMManager(utility_config, synthesis_config) return manager @pytest.fixture def query_expander(llm_manager): """Create query expander with fake LLM.""" return QueryExpander(llm_manager) class TestBuildSearchQuery: """Test query building strategies for semantic search.""" def test_root_node_uses_query_directly(self, query_expander): """Root nodes should use the original query without modification.""" query = "How does authentication work?" context = ResearchContext(root_query=query, ancestors=[]) result = query_expander.build_search_query(query, context) assert result == query, "Root node should return query unchanged" def test_child_node_with_single_ancestor(self, query_expander): """Child nodes should combine query with minimal parent context.""" query = "token validation logic" root = "How does authentication work?" context = ResearchContext(root_query=root, ancestors=["authentication flow"]) result = query_expander.build_search_query(query, context) assert query in result, "Result should contain original query" assert "authentication flow" in result, "Result should contain ancestor context" assert result.startswith(query), "Query should come first (position bias)" assert " | Context: " in result, "Should have clear separator" def test_child_node_with_multiple_ancestors(self, query_expander): """Child nodes should use last 2 ancestors for context.""" query = "session storage implementation" root = "How does authentication work?" context = ResearchContext( root_query=root, ancestors=[ "authentication flow", "token validation logic", "session management", ], ) result = query_expander.build_search_query(query, context) assert query in result, "Result should contain original query" assert result.startswith(query), "Query should come first (position bias)" # Should use last 2 ancestors only assert "token validation logic" in result assert "session management" in result # Should NOT include first ancestor (too far back) assert "authentication flow" not in result def test_query_first_position_for_embedding_bias(self, query_expander): """Query must come before context for embedding model position bias.""" query = "database query" context = ResearchContext( root_query="root", ancestors=["parent1", "parent2"] ) result = query_expander.build_search_query(query, context) # Verify query is at the very beginning assert result.index(query) == 0, "Query must be first for position bias" assert result.startswith(query), "Query must start the search string" class TestExpandQueryWithLLM: """Test LLM-based query expansion.""" @pytest.mark.asyncio async def test_successful_query_expansion(self, query_expander): """Should expand query into multiple variations using LLM.""" query = "authentication implementation" context = ResearchContext(root_query=query, ancestors=[]) # FakeLLMProvider will return scripted response for "expand" pattern result = await query_expander.expand_query_with_llm(query, context) assert len(result) == 3, "Should return original + 2 expanded queries" assert result[0] == query, "Original query should be first" assert len(result[1]) > 0, "Expanded queries should not be empty" assert len(result[2]) > 0, "Expanded queries should not be empty" @pytest.mark.asyncio async def test_expansion_with_context(self, query_expander): """Should include context in expansion prompt.""" query = "token storage" context = ResearchContext( root_query="authentication system", ancestors=["auth flow", "token validation"], ) result = await query_expander.expand_query_with_llm(query, context) # Should still work with context assert len(result) >= 1, "Should at least return original query" assert result[0] == query, "Original query should be first" @pytest.mark.asyncio async def test_expansion_failure_returns_original(self, monkeypatch): """Should gracefully fallback to original query on LLM failure.""" # Create query expander with failing LLM failing_provider = FakeLLMProvider(responses={}) def mock_create_provider(self, config): return failing_provider monkeypatch.setattr(LLMManager, "_create_provider", mock_create_provider) utility_config = {"provider": "fake", "model": "fake-gpt"} synthesis_config = {"provider": "fake", "model": "fake-gpt"} failing_manager = LLMManager(utility_config, synthesis_config) failing_expander = QueryExpander(failing_manager) query = "test query" context = ResearchContext(root_query=query, ancestors=[]) result = await failing_expander.expand_query_with_llm(query, context) assert result == [query], "Should return original query on failure" @pytest.mark.asyncio async def test_empty_expanded_queries_returns_original(self, monkeypatch): """Should handle empty expansion results gracefully.""" # Create provider that returns empty queries empty_provider = FakeLLMProvider(responses={"expand": '{"queries": []}'}) def mock_create_provider(self, config): return empty_provider monkeypatch.setattr(LLMManager, "_create_provider", mock_create_provider) utility_config = {"provider": "fake", "model": "fake-gpt"} synthesis_config = {"provider": "fake", "model": "fake-gpt"} empty_manager = LLMManager(utility_config, synthesis_config) empty_expander = QueryExpander(empty_manager) query = "test query" context = ResearchContext(root_query=query, ancestors=[]) result = await empty_expander.expand_query_with_llm(query, context) assert result == [query], "Should return original query when expansion is empty" @pytest.mark.asyncio async def test_original_query_always_first(self, query_expander): """Original query must always be first for position bias.""" query = "original query" context = ResearchContext(root_query=query, ancestors=[]) result = await query_expander.expand_query_with_llm(query, context) assert result[0] == query, "Original query MUST be first for position bias" class TestEdgeCases: """Test edge cases and boundary conditions.""" def test_empty_ancestors_list(self, query_expander): """Empty ancestors should work like root node.""" query = "test query" context = ResearchContext(root_query=query, ancestors=[]) result = query_expander.build_search_query(query, context) assert result == query, "Empty ancestors should return query unchanged" def test_single_ancestor_uses_it(self, query_expander): """Single ancestor should be included in context.""" query = "child query" context = ResearchContext(root_query="root", ancestors=["parent"]) result = query_expander.build_search_query(query, context) assert "parent" in result, "Single ancestor should be in context" def test_whitespace_handling(self, query_expander): """Should handle queries with extra whitespace.""" query = " query with spaces " context = ResearchContext(root_query=query.strip(), ancestors=[]) result = query_expander.build_search_query(query, context) assert query in result, "Should preserve query including whitespace" @pytest.mark.asyncio async def test_special_characters_in_query(self, query_expander): """Should handle special characters in queries.""" query = "auth && validation || tokens" context = ResearchContext(root_query=query, ancestors=[]) result = await query_expander.expand_query_with_llm(query, context) assert len(result) >= 1, "Should handle special characters" assert result[0] == query, "Original query should be preserved"

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ofriw/chunkhound'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_query_expander.py•9.76 KiB