Skip to main content
Glama
test_quality_integration.py10.4 kB
"""Integration tests for quality-based consolidation and search.""" import pytest import asyncio import time from datetime import datetime, timedelta from typing import List from mcp_memory_service.consolidation.forgetting import ControlledForgettingEngine from mcp_memory_service.consolidation.decay import ExponentialDecayCalculator, RelevanceScore from mcp_memory_service.consolidation.base import ConsolidationConfig from mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage from mcp_memory_service.models.memory import Memory from mcp_memory_service.config import ( MCP_QUALITY_RETENTION_HIGH, MCP_QUALITY_RETENTION_MEDIUM, MCP_QUALITY_RETENTION_LOW_MIN, MCP_QUALITY_RETENTION_LOW_MAX, ) @pytest.fixture async def storage(): """Create an in-memory SQLite storage instance.""" storage = SqliteVecMemoryStorage(':memory:') await storage.initialize() yield storage # Cleanup is automatic for in-memory database def create_memory(content: str, quality_score: float = 0.5, days_old: int = 0, tags: List[str] = None) -> Memory: """Helper to create a memory with specified quality and age.""" created_at = time.time() - (days_old * 86400) memory = Memory( content=content, content_hash=f"hash_{hash(content)}", tags=tags or [], metadata={'quality_score': quality_score}, created_at=created_at, created_at_iso=datetime.fromtimestamp(created_at).isoformat() + 'Z' ) return memory @pytest.mark.asyncio async def test_quality_based_forgetting(): """Test that high-quality memories are preserved longer.""" config = ConsolidationConfig( relevance_threshold=0.1, access_threshold_days=90, archive_location='/tmp/test_archive' ) forgetting_engine = ControlledForgettingEngine(config) # Create memories with different quality scores and ages high_quality_old = create_memory("High quality old memory", quality_score=0.9, days_old=200) medium_quality_old = create_memory("Medium quality old memory", quality_score=0.6, days_old=200) low_quality_old = create_memory("Low quality old memory", quality_score=0.2, days_old=60) memories = [high_quality_old, medium_quality_old, low_quality_old] # Create relevance scores (all with low total_score to trigger forgetting check) score_lookup = { high_quality_old.content_hash: RelevanceScore( memory_hash=high_quality_old.content_hash, total_score=0.5, base_importance=1.0, decay_factor=0.5, connection_boost=1.0, access_boost=1.0, metadata={} ), medium_quality_old.content_hash: RelevanceScore( memory_hash=medium_quality_old.content_hash, total_score=0.5, base_importance=1.0, decay_factor=0.5, connection_boost=1.0, access_boost=1.0, metadata={} ), low_quality_old.content_hash: RelevanceScore( memory_hash=low_quality_old.content_hash, total_score=0.5, base_importance=1.0, decay_factor=0.5, connection_boost=1.0, access_boost=1.0, metadata={} ), } # Set access patterns (simulate old access) access_patterns = { high_quality_old.content_hash: datetime.now() - timedelta(days=200), medium_quality_old.content_hash: datetime.now() - timedelta(days=200), low_quality_old.content_hash: datetime.now() - timedelta(days=60), } # Identify forgetting candidates candidates = await forgetting_engine._identify_forgetting_candidates( memories, score_lookup, access_patterns, 'monthly' ) # Verify quality-based retention policy candidate_hashes = {c.memory.content_hash for c in candidates} # High quality (0.9): 200 days < 365 day threshold -> should NOT be candidate assert high_quality_old.content_hash not in candidate_hashes, \ "High quality memory should be preserved longer" # Medium quality (0.6): 200 days > 180 day threshold -> should be candidate assert medium_quality_old.content_hash in candidate_hashes, \ "Medium quality memory should be candidate after 180 days" # Low quality (0.2): 60 days > scaled threshold (30 + 0.4*60 = 54 days) -> should be candidate assert low_quality_old.content_hash in candidate_hashes, \ "Low quality memory should be candidate after short period" @pytest.mark.asyncio async def test_quality_weighted_decay(): """Test that high-quality memories have slower decay.""" config = ConsolidationConfig( retention_periods={'standard': 30} ) decay_calculator = ExponentialDecayCalculator(config) # Create memories with different quality scores high_quality = create_memory("High quality memory", quality_score=0.9, days_old=10) low_quality = create_memory("Low quality memory", quality_score=0.2, days_old=10) # Calculate decay scores high_score = await decay_calculator._calculate_memory_relevance( high_quality, datetime.now(), {}, # no connections {} # no access patterns ) low_score = await decay_calculator._calculate_memory_relevance( low_quality, datetime.now(), {}, # no connections {} # no access patterns ) # High quality should have higher total score due to quality multiplier assert high_score.total_score > low_score.total_score, \ "High quality memory should have slower decay (higher score)" # Verify quality multiplier is applied assert 'quality_multiplier' in high_score.metadata assert 'quality_multiplier' in low_score.metadata assert high_score.metadata['quality_multiplier'] > low_score.metadata['quality_multiplier'] @pytest.mark.asyncio async def test_quality_boosted_search(storage): """Test quality-based reranking improves results.""" # Store memories with varying quality and semantic similarity # Memory 1: High semantic relevance, low quality mem1 = create_memory( "Python async patterns for concurrent programming with asyncio", quality_score=0.3, tags=['python', 'async'] ) # Memory 2: Medium semantic relevance, high quality mem2 = create_memory( "Advanced Python concurrency techniques", quality_score=0.9, tags=['python', 'advanced'] ) # Memory 3: Low semantic relevance, medium quality mem3 = create_memory( "JavaScript promises and callbacks", quality_score=0.5, tags=['javascript'] ) await storage.store(mem1) await storage.store(mem2) await storage.store(mem3) # Search with quality boost disabled (semantic only) semantic_results = await storage.retrieve_with_quality_boost( query="Python async programming", n_results=3, quality_boost=False ) # Search with quality boost enabled (70% semantic + 30% quality) boosted_results = await storage.retrieve_with_quality_boost( query="Python async programming", n_results=3, quality_boost=True, quality_weight=0.3 ) # Verify reranking occurred assert len(boosted_results) > 0 for result in boosted_results: assert 'reranked' in result.debug_info assert result.debug_info['reranked'] is True assert 'original_semantic_score' in result.debug_info assert 'quality_score' in result.debug_info @pytest.mark.asyncio async def test_quality_boost_performance(storage): """Test that quality boost doesn't add excessive latency.""" # Store 100 memories with random quality scores import random for i in range(100): memory = create_memory( f"Test memory {i} with some content about Python and async patterns", quality_score=random.random(), tags=['test', f'batch-{i//10}'] ) await storage.store(memory) # Measure latency start = time.time() results = await storage.retrieve_with_quality_boost( query="Python async", n_results=10, quality_boost=True, quality_weight=0.3 ) latency = time.time() - start # Should be fast (<200ms target for 100 memories) assert latency < 0.2, f"Quality boost too slow: {latency:.3f}s" assert len(results) <= 10 @pytest.mark.asyncio async def test_quality_boost_weight_validation(storage): """Test that invalid quality weights are rejected.""" memory = create_memory("Test memory", quality_score=0.5) await storage.store(memory) # Test invalid weight (too high) with pytest.raises(ValueError, match="quality_weight must be 0.0-1.0"): await storage.retrieve_with_quality_boost( query="test", n_results=5, quality_boost=True, quality_weight=1.5 ) # Test invalid weight (negative) with pytest.raises(ValueError, match="quality_weight must be 0.0-1.0"): await storage.retrieve_with_quality_boost( query="test", n_results=5, quality_boost=True, quality_weight=-0.1 ) @pytest.mark.asyncio async def test_quality_boost_edge_cases(storage): """Test quality boost with edge cases.""" # Test with no memories results = await storage.retrieve_with_quality_boost( query="nonexistent", n_results=10, quality_boost=True ) assert len(results) == 0 # Test with single memory memory = create_memory("Single memory", quality_score=0.7) await storage.store(memory) results = await storage.retrieve_with_quality_boost( query="single", n_results=10, quality_boost=True ) assert len(results) == 1 # Test with quality_weight=0.0 (semantic only) results = await storage.retrieve_with_quality_boost( query="single", n_results=10, quality_boost=True, quality_weight=0.0 ) assert len(results) == 1 # Test with quality_weight=1.0 (quality only) results = await storage.retrieve_with_quality_boost( query="single", n_results=10, quality_boost=True, quality_weight=1.0 ) assert len(results) == 1 if __name__ == '__main__': pytest.main([__file__, '-v'])

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/doobidoo/mcp-memory-service'

If you have feedback or need assistance with the MCP directory API, please join our Discord server