Session Buddy

Overview Schema Related Servers Score Discussions

test_progressive_search.py•21.5 KiB

#!/usr/bin/env python3 """Unit tests for Phase 3: Progressive Search components. Tests the following components: 1. SearchTier enum and tier configuration 2. SufficiencyEvaluator logic and early stopping 3. ProgressiveSearchEngine tier coordination 4. Result aggregation and metadata tracking """ from __future__ import annotations import asyncio import time from unittest.mock import AsyncMock, MagicMock, patch import pytest from session_buddy.search.progressive_search import ( ProgressiveSearchEngine, SearchTier, SufficiencyConfig, SufficiencyEvaluator, TierSearchResult, ProgressiveSearchResult, ) class TestSearchTier: """Test SearchTier enum configuration and methods.""" def test_tier_min_scores(self): """Test that each tier has appropriate minimum scores.""" assert SearchTier.get_min_score(SearchTier.CATEGORIES) == 0.9 assert SearchTier.get_min_score(SearchTier.INSIGHTS) == 0.75 assert SearchTier.get_min_score(SearchTier.REFLECTIONS) == 0.7 assert SearchTier.get_min_score(SearchTier.CONVERSATIONS) == 0.6 def test_tier_max_results(self): """Test that each tier has appropriate result limits.""" assert SearchTier.get_max_results(SearchTier.CATEGORIES) == 10 assert SearchTier.get_max_results(SearchTier.INSIGHTS) == 15 assert SearchTier.get_max_results(SearchTier.REFLECTIONS) == 20 assert SearchTier.get_max_results(SearchTier.CONVERSATIONS) == 30 def test_tier_names(self): """Test that tier names are human-readable.""" assert "High-quality" in SearchTier.get_tier_name(SearchTier.CATEGORIES) assert "Learned" in SearchTier.get_tier_name(SearchTier.INSIGHTS) assert "reflections" in SearchTier.get_tier_name(SearchTier.REFLECTIONS).lower() assert "conversations" in SearchTier.get_tier_name(SearchTier.CONVERSATIONS).lower() def test_tier_ordering(self): """Test that tiers are ordered from fastest to slowest.""" # CATEGORIES should have highest min_score (most selective) assert SearchTier.get_min_score(SearchTier.CATEGORIES) > SearchTier.get_min_score(SearchTier.INSIGHTS) assert SearchTier.get_min_score(SearchTier.INSIGHTS) > SearchTier.get_min_score(SearchTier.REFLECTIONS) assert SearchTier.get_min_score(SearchTier.REFLECTIONS) > SearchTier.get_min_score(SearchTier.CONVERSATIONS) class TestSufficiencyEvaluator: """Test sufficiency evaluation logic for early stopping.""" @pytest.fixture def evaluator(self): """Create a SufficiencyEvaluator instance with default config.""" return SufficiencyEvaluator() def test_empty_results(self, evaluator): """Test that empty results are not sufficient.""" is_sufficient, reason = evaluator.is_sufficient([], SearchTier.CATEGORIES) assert not is_sufficient assert "No results found" in reason def test_perfect_matches_trigger_early_stop(self, evaluator): """Test that 3+ perfect matches (score >=0.95) trigger early stop.""" results = [ {"score": 0.96, "content": "result1"}, {"score": 0.97, "content": "result2"}, {"score": 0.98, "content": "result3"}, ] is_sufficient, reason = evaluator.is_sufficient(results, SearchTier.CATEGORIES) assert is_sufficient assert "perfect matches" in reason.lower() assert "3" in reason def test_high_quality_results_trigger_early_stop(self, evaluator): """Test that high-quality results (avg >=0.85, 3+ results) trigger early stop.""" results = [ {"score": 0.90, "content": "result1"}, {"score": 0.85, "content": "result2"}, {"score": 0.86, "content": "result3"}, {"score": 0.84, "content": "result4"}, {"score": 0.87, "content": "result5"}, ] is_sufficient, reason = evaluator.is_sufficient(results, SearchTier.CATEGORIES) assert is_sufficient assert "high-quality" in reason.lower() # Check that the actual average score (0.86) is mentioned assert "0.86" in reason def test_minimum_score_threshold(self, evaluator): """Test that results meeting tier minimum are sufficient.""" # For INSIGHTS tier (min_score: 0.75) results = [ {"score": 0.80, "content": "result1"}, {"score": 0.76, "content": "result2"}, {"score": 0.78, "content": "result3"}, {"score": 0.75, "content": "result4"}, {"score": 0.77, "content": "result5"}, ] is_sufficient, reason = evaluator.is_sufficient(results, SearchTier.INSIGHTS) assert is_sufficient assert "0.75" in reason or "75%" in reason def test_below_threshold_not_sufficient(self, evaluator): """Test that results below minimum threshold are not sufficient.""" # For CATEGORIES tier (min_score: 0.9) results = [ {"score": 0.85, "content": "result1"}, {"score": 0.87, "content": "result2"}, {"score": 0.89, "content": "result3"}, ] is_sufficient, reason = evaluator.is_sufficient(results, SearchTier.CATEGORIES) assert not is_sufficient assert "better results" in reason.lower() or "avg score" in reason.lower() def test_calculate_sufficiency_score(self, evaluator): """Test sufficiency score calculation (0.0-1.0).""" # High-quality results with more results (increases quantity score) high_quality = [ {"score": 0.90, "content": "result1"}, {"score": 0.85, "content": "result2"}, {"score": 0.88, "content": "result3"}, {"score": 0.92, "content": "result4"}, {"score": 0.87, "content": "result5"}, ] score = evaluator.calculate_sufficiency_score(high_quality) # With 5 results: quantity_score = 5/20 = 0.25, quality_score = 0.884 # Weighted: 0.884*0.7 + 0.25*0.3 = 0.6188 + 0.075 = 0.694 assert 0.6 <= score <= 1.0 # Quality-weighted should be reasonably high # Low-quality results low_quality = [ {"score": 0.60, "content": "result1"}, {"score": 0.65, "content": "result2"}, ] score = evaluator.calculate_sufficiency_score(low_quality) assert 0.0 <= score < 0.6 # Should be lower # Empty results score = evaluator.calculate_sufficiency_score([]) assert score == 0.0 class TestProgressiveSearchEngine: """Test ProgressiveSearchEngine coordination and tier searching.""" @pytest.fixture def engine(self): """Create a ProgressiveSearchEngine instance.""" return ProgressiveSearchEngine() @pytest.fixture def mock_db(self): """Create a mock database adapter.""" db = AsyncMock() db.search_reflections = AsyncMock(return_value=[]) db.search_conversations = AsyncMock(return_value=[]) return db def test_engine_initialization(self, engine): """Test that engine initializes with correct configuration.""" assert engine.evaluator is not None assert isinstance(engine.config, SufficiencyConfig) assert engine.config.max_tiers == 4 @pytest.mark.asyncio async def test_search_progressive_with_one_tier(self, engine, mock_db): """Test progressive search limited to one tier.""" # Setup mock results mock_db.search_reflections.return_value = [ {"content": "reflection1", "score": 0.92, "project": "test"}, {"content": "reflection2", "score": 0.91, "project": "test"}, ] with patch.object(engine, "_db", mock_db): result = await engine.search_progressive( query="test query", project="test", max_tiers=1, # Only search CATEGORIES tier enable_early_stop=False, # Don't early stop ) assert result.total_results == 2 assert len(result.tiers_searched) == 1 assert result.tiers_searched[0] == SearchTier.CATEGORIES assert not result.early_stop assert result.search_complete @pytest.mark.asyncio async def test_search_progressive_with_early_stop(self, engine, mock_db): """Test that early stopping works when sufficient results found.""" # Setup mock results with high scores (must include project field for filtering) async def mock_search_reflections(*args, **kwargs): return [ {"content": "reflection1", "score": 0.96, "project": "test"}, {"content": "reflection2", "score": 0.97, "project": "test"}, {"content": "reflection3", "score": 0.98, "project": "test"}, ] async def mock_search_conversations(*args, **kwargs): return [] mock_db.search_reflections = mock_search_reflections mock_db.search_conversations = mock_search_conversations with patch.object(engine, "_db", mock_db): result = await engine.search_progressive( query="test query", project="test", max_tiers=4, enable_early_stop=True, # Enable early stopping ) # Should stop after first tier due to perfect matches assert result.early_stop assert len(result.tiers_searched) == 1 assert result.total_results == 3 assert "perfect matches" in result.metadata.get("early_stop_reason", "").lower() @pytest.mark.asyncio async def test_search_progressive_all_tiers(self, engine, mock_db): """Test searching all tiers when early stopping disabled.""" # Setup mock results with moderate scores (no early stop) mock_db.search_reflections.return_value = [ {"content": "reflection", "score": 0.80}, ] mock_db.search_conversations.return_value = [ {"content": "conversation", "score": 0.65}, ] with patch.object(engine, "_db", mock_db): result = await engine.search_progressive( query="test query", project="test", max_tiers=4, enable_early_stop=False, # Disable early stopping ) # Should search all 4 tiers assert len(result.tiers_searched) == 4 assert not result.early_stop assert result.search_complete @pytest.mark.asyncio async def test_max_tiers_validation(self, engine): """Test that max_tiers parameter is validated.""" with pytest.raises(ValueError, match="max_tiers must be 1-4"): await engine.search_progressive( query="test", max_tiers=5, # Invalid: >4 ) with pytest.raises(ValueError, match="max_tiers must be 1-4"): await engine.search_progressive( query="test", max_tiers=0, # Invalid: <1 ) @pytest.mark.asyncio async def test_tier_result_metadata(self, engine, mock_db): """Test that tier results include correct metadata.""" # Setup mock results - both scores must be >0.9 (CATEGORIES minimum) async def mock_search_reflections(*args, **kwargs): return [ {"content": "result1", "score": 0.92}, {"content": "result2", "score": 0.91}, ] mock_db.search_reflections = mock_search_reflections mock_db.search_conversations = AsyncMock(return_value=[]) with patch.object(engine, "_db", mock_db): result = await engine.search_progressive( query="test query", max_tiers=1, enable_early_stop=False, ) # Check tier results assert len(result.tier_results) == 1 tier_result = result.tier_results[0] assert tier_result.tier == SearchTier.CATEGORIES assert tier_result.total_found == 2 # Should be 2 results assert tier_result.searched is True assert tier_result.min_score >= 0.91 # Should be 0.91 (minimum of both) assert tier_result.max_score <= 1.0 assert 0.0 < tier_result.avg_score < 1.0 assert tier_result.latency_ms >= 0 @pytest.mark.asyncio async def test_search_progressive_latency_tracking(self, engine, mock_db): """Test that search latency is tracked correctly.""" mock_db.search_reflections.return_value = [] with patch.object(engine, "_db", mock_db): result = await engine.search_progressive( query="test query", max_tiers=2, enable_early_stop=False, ) assert result.total_latency_ms >= 0 # Should track latency for each tier searched for tier_result in result.tier_results: assert tier_result.latency_ms >= 0 class TestProgressiveSearchSuccessCriteria: """Test Phase 3 success criteria for progressive search.""" @pytest.fixture def engine(self): """Create a ProgressiveSearchEngine instance.""" return ProgressiveSearchEngine() @pytest.fixture def mock_db(self): """Create a mock database adapter.""" db = AsyncMock() # Setup realistic tier response patterns with high-quality first tier # to trigger early stopping async def mock_search_reflections(*args, **kwargs): # CATEGORIES tier returns high-quality results (triggers early stop) return [ {"content": f"reflection{i}", "score": 0.95 - (i * 0.01)} for i in range(5) ] async def mock_search_conversations(*args, **kwargs): return [] db.search_reflections = mock_search_reflections db.search_conversations = mock_search_conversations return db @pytest.mark.asyncio async def test_average_tiers_searched_less_than_2_5(self, engine, mock_db): """Test success criterion: average tiers searched <2.5 for typical queries. This verifies that early stopping is effective and most queries find sufficient results in the first 1-2 tiers. """ queries = [ "Python async patterns", "FastAPI middleware", "pytest fixtures", "database optimization", ] total_tiers = 0 for query in queries: with patch.object(engine, "_db", mock_db): result = await engine.search_progressive( query=query, max_tiers=4, enable_early_stop=True, ) total_tiers += len(result.tiers_searched) avg_tiers = total_tiers / len(queries) print(f"\nAverage Tiers Searched Results:") print(f" Total queries: {len(queries)}") print(f" Total tier searches: {total_tiers}") print(f" Average tiers per query: {avg_tiers:.2f}") # Success criterion: <2.5 average tiers assert avg_tiers < 2.5, f"Average tiers {avg_ters:.2f} exceeds 2.5 threshold" @pytest.mark.asyncio async def test_search_time_reduction(self, engine, mock_db): """Test success criterion: progressive search searches fewer tiers. This test verifies that early stopping reduces the number of tiers searched, which is the primary performance optimization of progressive search. Actual time reduction may vary due to overhead of sufficiency checking. """ query = "Python async patterns" # Setup mocks - early stopping version finds perfect matches in tier 1 async def mock_search_reflections_early(*args, **kwargs): # High quality results trigger early stopping (3+ perfect matches) # Include project field to pass filtering return [ {"content": f"result{i}", "score": 0.96 - (i * 0.01), "project": None} for i in range(5) ] async def mock_search_conversations_early(*args, **kwargs): return [] # Full search version has moderate results that don't trigger early stop # This causes it to search all 4 tiers async def mock_search_reflections_full(*args, **kwargs): # Moderate quality results - no perfect matches, avg < 0.85 # Include project field to pass filtering return [ {"content": f"result{i}", "score": 0.82 - (i * 0.02), "project": None} for i in range(5) ] async def mock_search_conversations_full(*args, **kwargs): return [] # Measure progressive search time (with early stopping) early_db = AsyncMock() early_db.search_reflections = mock_search_reflections_early early_db.search_conversations = mock_search_conversations_early with patch.object(engine, "_db", early_db): progressive_start = time.perf_counter() progressive_result = await engine.search_progressive( query=query, max_tiers=4, enable_early_stop=True, ) progressive_time = time.perf_counter() - progressive_start # Measure full search time (no early stopping) full_db = AsyncMock() full_db.search_reflections = mock_search_reflections_full full_db.search_conversations = mock_search_conversations_full with patch.object(engine, "_db", full_db): full_start = time.perf_counter() full_result = await engine.search_progressive( query=query, max_tiers=4, enable_early_stop=False, # Disable early stopping ) full_time = time.perf_counter() - full_start # Calculate improvement time_reduction = (full_time - progressive_time) / full_time if full_time > 0 else 0 tier_reduction = len(full_result.tiers_searched) - len(progressive_result.tiers_searched) print(f"\nSearch Performance Results:") print(f" Progressive search: {progressive_time*1000:.2f}ms ({len(progressive_result.tiers_searched)} tiers)") print(f" Full search: {full_time*1000:.2f}ms ({len(full_result.tiers_searched)} tiers)") print(f" Time reduction: {time_reduction:.1%}") print(f" Tier reduction: {tier_reduction} fewer tiers searched") # Primary success criterion: early stopping reduces tiers searched assert len(progressive_result.tiers_searched) < len(full_result.tiers_searched), \ "Progressive search should search fewer tiers than full search when early stopping is effective" # Secondary check: if tiers were reduced, verify it was due to early stopping if tier_reduction > 0: assert progressive_result.early_stop, "Early stopping flag should be set when fewer tiers searched" @pytest.mark.asyncio async def test_result_quality_maintained(self, engine, mock_db): """Test that result quality is maintained or improved. Verifies that progressive search doesn't sacrifice result quality for performance gains. """ query = "high-quality insights about async" # Setup high-quality results async def mock_search_reflections_high_quality(*args, **kwargs): return [ {"content": "result1", "score": 0.95}, {"content": "result2", "score": 0.93}, {"content": "result3", "score": 0.91}, ] mock_db.search_reflections = mock_search_reflections_high_quality mock_db.search_conversations = AsyncMock(return_value=[]) with patch.object(engine, "_db", mock_db): result = await engine.search_progressive( query=query, max_tiers=4, enable_early_stop=True, ) # Should find high-quality results assert result.total_results >= 3 assert len(result.tiers_searched) >= 1 # Check result quality all_scores = [] for tier_result in result.tier_results: for r in tier_result.results: score = r.get("score", r.get("similarity", 0.0)) if score: all_scores.append(score) if all_scores: avg_score = sum(all_scores) / len(all_scores) print(f"\nResult Quality:") print(f" Total results: {result.total_results}") print(f" Average score: {avg_score:.2f}") # Results should maintain good quality assert avg_score >= 0.7, f"Average score {avg_score:.2f} is below quality threshold" @pytest.mark.asyncio async def test_graceful_degradation_to_single_tier(self, engine, mock_db): """Test that system gracefully degrades when tiers fail. Verifies that if later tiers fail, the system still returns results from earlier tiers. """ query = "test query" # Setup only first tier to succeed async def mock_search_reflections(*args, **kwargs): return [{"content": "result", "score": 0.85}] async def mock_search_conversations_fail(*args, **kwargs): raise RuntimeError("Database connection failed") mock_db.search_reflections = mock_search_reflections mock_db.search_conversations = mock_search_conversations_fail with patch.object(engine, "_db", mock_db): # Should not raise exception result = await engine.search_progressive( query=query, max_tiers=2, enable_early_stop=False, ) # Should return results from successful tier assert result.total_results >= 1 assert len(result.tiers_searched) >= 1

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/lesleslie/session-buddy'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_progressive_search.py•21.5 KiB