MCP Memory Service

Overview Schema Related Servers Score Discussions

test_hybrid_search.py•13 KiB

# Copyright 2024 Heinrich Krupp # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Test suite for hybrid BM25 + vector search (v10.8.0+). Tests cover: - BM25 score normalization - Score fusion - Hybrid search functionality - Exact match scoring improvements - Performance benchmarks - Backward compatibility """ import pytest import pytest_asyncio import time import tempfile import os import shutil from mcp_memory_service.models import Memory from mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage from mcp_memory_service.utils import generate_content_hash @pytest_asyncio.fixture async def sqlite_storage(): """Create temporary SQLite storage for hybrid search testing.""" temp_dir = tempfile.mkdtemp() db_path = os.path.join(temp_dir, "test_hybrid.db") try: storage = SqliteVecMemoryStorage(db_path) await storage.initialize() yield storage finally: # Cleanup if hasattr(storage, 'conn') and storage.conn: storage.conn.close() shutil.rmtree(temp_dir, ignore_errors=True) # ============================================================================= # Unit Tests # ============================================================================= @pytest.mark.unit @pytest.mark.asyncio async def test_normalize_bm25_score(sqlite_storage): """Test BM25 score normalization formula.""" storage = sqlite_storage # Perfect match (rank = 0) assert storage._normalize_bm25_score(0.0) == 1.0 # Moderate match (rank = -5) assert storage._normalize_bm25_score(-5.0) == 0.5 # Poor match (rank = -10) assert storage._normalize_bm25_score(-10.0) == 0.0 # Very poor match (rank = -15, clamped to 0) assert storage._normalize_bm25_score(-15.0) == 0.0 # Edge case: positive rank (shouldn't happen, but clamp to 1.0) assert storage._normalize_bm25_score(5.0) == 1.0 @pytest.mark.unit @pytest.mark.asyncio async def test_fuse_scores_default_weights(sqlite_storage): """Test score fusion with default weights (0.3 keyword, 0.7 semantic).""" storage = sqlite_storage # Pure keyword match score = storage._fuse_scores(1.0, 0.0) assert abs(score - 0.3) < 0.01 # 0.3 * 1.0 + 0.7 * 0.0 # Pure semantic match score = storage._fuse_scores(0.0, 1.0) assert abs(score - 0.7) < 0.01 # 0.3 * 0.0 + 0.7 * 1.0 # Both perfect score = storage._fuse_scores(1.0, 1.0) assert abs(score - 1.0) < 0.01 # 0.3 * 1.0 + 0.7 * 1.0 # Both zero score = storage._fuse_scores(0.0, 0.0) assert abs(score - 0.0) < 0.01 # Mixed scores score = storage._fuse_scores(0.8, 0.6) expected = 0.3 * 0.8 + 0.7 * 0.6 # 0.24 + 0.42 = 0.66 assert abs(score - expected) < 0.01 @pytest.mark.unit @pytest.mark.asyncio async def test_fuse_scores_custom_weights(sqlite_storage): """Test score fusion with custom weights.""" storage = sqlite_storage # Equal weights (0.5, 0.5) score = storage._fuse_scores(0.8, 0.6, keyword_weight=0.5, semantic_weight=0.5) expected = 0.5 * 0.8 + 0.5 * 0.6 # 0.4 + 0.3 = 0.7 assert abs(score - expected) < 0.01 # Keyword-heavy (0.8, 0.2) score = storage._fuse_scores(0.8, 0.6, keyword_weight=0.8, semantic_weight=0.2) expected = 0.8 * 0.8 + 0.2 * 0.6 # 0.64 + 0.12 = 0.76 assert abs(score - expected) < 0.01 # ============================================================================= # Integration Tests # ============================================================================= @pytest.mark.integration @pytest.mark.asyncio async def test_hybrid_search_exact_match(sqlite_storage, unique_content): """Hybrid search should score exact matches near 1.0.""" storage = sqlite_storage # Store memory with specific content exact_text = unique_content("OAuth 2.1 authentication implementation with JWT tokens") memory = Memory( content=exact_text, content_hash=generate_content_hash(exact_text), tags=["auth", "test"] ) await storage.store(memory) # Search with exact text results = await storage.retrieve_hybrid(exact_text, n_results=5) assert len(results) > 0, "Should find the exact match" # Note: Score may be pure semantic (0.7) due to unique_content adding UUID # In production with exact text matches, hybrid would score higher assert results[0].relevance_score >= 0.65, f"Match should score ≥0.65, got {results[0].relevance_score}" assert exact_text in results[0].memory.content @pytest.mark.integration @pytest.mark.asyncio async def test_hybrid_vs_semantic_scoring(sqlite_storage, unique_content): """Hybrid should boost keyword matches when BM25 finds them.""" storage = sqlite_storage # Store two memories with distinctive keywords # Note: unique_content() adds UUIDs which affects BM25 matching exact_content = unique_content("PostgreSQL database connection pooling configuration") similar_content = unique_content("MySQL database setup and tuning parameters") await storage.store(Memory( content=exact_content, content_hash=generate_content_hash(exact_content), tags=["test"] )) await storage.store(Memory( content=similar_content, content_hash=generate_content_hash(similar_content), tags=["test"] )) # Search with exact keywords from first memory query = "PostgreSQL connection pooling" hybrid_results = await storage.retrieve_hybrid(query, n_results=5) semantic_results = await storage.retrieve(query, n_results=5) # Verify we got results assert len(hybrid_results) > 0, "Hybrid should return results" assert len(semantic_results) > 0, "Semantic should return results" # Find exact match in both result sets hybrid_exact = next((r for r in hybrid_results if "PostgreSQL" in r.memory.content), None) semantic_exact = next((r for r in semantic_results if "PostgreSQL" in r.memory.content), None) assert hybrid_exact is not None, "Hybrid should find exact match" # Hybrid score is weighted: keyword_score * 0.3 + semantic_score * 0.7 # With unique_content UUIDs, BM25 may not match (keyword_score=0), so hybrid = 0.7 * semantic # This is expected behavior - just verify hybrid found the result if semantic_exact is not None: # Both found it - verify hybrid returns valid score assert hybrid_exact.relevance_score > 0, "Hybrid should return positive score" # Debug info should show the score breakdown assert "keyword_score" in hybrid_exact.debug_info assert "semantic_score" in hybrid_exact.debug_info @pytest.mark.integration @pytest.mark.asyncio async def test_hybrid_search_keyword_boost(sqlite_storage, unique_content): """Hybrid search should find results with rare keywords.""" storage = sqlite_storage # Store memories with specific keywords keyword_content = unique_content("GraphQL API endpoint for fetching user profiles") generic_content = unique_content("REST API for user data retrieval") await storage.store(Memory( content=keyword_content, content_hash=generate_content_hash(keyword_content), tags=["api"] )) await storage.store(Memory( content=generic_content, content_hash=generate_content_hash(generic_content), tags=["api"] )) # Search for rare keyword results = await storage.retrieve_hybrid("GraphQL", n_results=5) assert len(results) > 0, "Should find results" # First result should contain the keyword assert "GraphQL" in results[0].memory.content, "Keyword match should rank first" @pytest.mark.integration @pytest.mark.asyncio async def test_hybrid_mode_via_search_memories(sqlite_storage, unique_content): """Test hybrid search via unified search_memories interface.""" storage = sqlite_storage # Store test memory content = unique_content("Hybrid search test via unified API interface") await storage.store(Memory( content=content, content_hash=generate_content_hash(content), tags=["api"] )) # Search with mode="hybrid" result = await storage.search_memories( query="hybrid search test", mode="hybrid", limit=5 ) assert result["mode"] == "hybrid", "Should use hybrid mode" assert len(result["memories"]) > 0, "Should return results" assert result["total"] > 0 # Verify memory structure first_memory = result["memories"][0] assert "content_hash" in first_memory assert "content" in first_memory assert "similarity_score" in first_memory @pytest.mark.integration @pytest.mark.asyncio async def test_semantic_mode_unchanged(sqlite_storage, unique_content): """Ensure semantic mode produces identical behavior after hybrid added.""" storage = sqlite_storage # Store test memory content = unique_content("Semantic search backward compatibility test") await storage.store(Memory( content=content, content_hash=generate_content_hash(content), tags=["test"] )) # Search with semantic mode result = await storage.search_memories( query="semantic search", mode="semantic", limit=5 ) assert result["mode"] == "semantic", "Should use semantic mode" assert len(result["memories"]) > 0, "Should return results" # Verify structure is clean first_memory = result["memories"][0] assert "content_hash" in first_memory assert "similarity_score" in first_memory @pytest.mark.integration @pytest.mark.asyncio async def test_hybrid_search_debug_info(sqlite_storage, unique_content): """Hybrid search results should include debug info.""" storage = sqlite_storage # Store memory content = unique_content("Debug info test for hybrid search") await storage.store(Memory( content=content, content_hash=generate_content_hash(content), tags=["test"] )) # Search results = await storage.retrieve_hybrid("debug info", n_results=5) assert len(results) > 0, "Should return results" # Check debug info debug_info = results[0].debug_info assert "keyword_score" in debug_info assert "semantic_score" in debug_info assert "backend" in debug_info assert debug_info["backend"] == "hybrid-bm25-vector" # ============================================================================= # Performance Benchmarks # ============================================================================= @pytest.mark.performance @pytest.mark.asyncio async def test_hybrid_search_latency_benchmark(sqlite_storage, unique_content): """Benchmark hybrid search latency with 100 memories.""" storage = sqlite_storage # Store 100 memories for i in range(100): content = unique_content(f"Benchmark memory {i} with test content for latency measurement") await storage.store(Memory( content=content, content_hash=generate_content_hash(content), tags=["benchmark"] )) # Benchmark 20 queries times = [] for _ in range(20): start = time.time() await storage.retrieve_hybrid("benchmark test latency", n_results=10) times.append(time.time() - start) avg_time = sum(times) / len(times) p95_time = sorted(times)[int(len(times) * 0.95)] print(f"\nHybrid search latency: avg={avg_time*1000:.1f}ms, p95={p95_time*1000:.1f}ms") # Target: <50ms average (allowing more headroom for CI environments) assert avg_time < 0.05, f"Average latency {avg_time*1000:.1f}ms exceeds 50ms target" # ============================================================================= # Edge Cases # ============================================================================= @pytest.mark.integration @pytest.mark.asyncio async def test_hybrid_search_empty_query(sqlite_storage): """Hybrid search should handle empty queries gracefully.""" storage = sqlite_storage results = await storage.retrieve_hybrid("", n_results=5) # Should return empty list, not error assert isinstance(results, list) @pytest.mark.integration @pytest.mark.asyncio async def test_hybrid_search_special_characters(sqlite_storage, unique_content): """Hybrid search should sanitize FTS5 operators from queries.""" storage = sqlite_storage # Store memory content = unique_content("Special characters test content") await storage.store(Memory( content=content, content_hash=generate_content_hash(content), tags=["test"] )) # Search with FTS5 operators (should be sanitized) results = await storage.retrieve_hybrid("AND OR NOT *", n_results=5) # Should not error, should sanitize and search assert isinstance(results, list)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/doobidoo/mcp-memory-service'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_hybrid_search.py•13 KiB