MCP Memory Service

Apache 2.0

689

Overview InspectNew Endpoints Schema Related Servers Reviews Score

"""Unit tests for the creative association engine."""

import pytest
from datetime import datetime, timedelta

from mcp_memory_service.consolidation.associations import (
    CreativeAssociationEngine, 
    AssociationAnalysis
)
from mcp_memory_service.consolidation.base import MemoryAssociation
from mcp_memory_service.models.memory import Memory


@pytest.mark.unit
class TestCreativeAssociationEngine:
    """Test the creative association discovery system."""
    
    @pytest.fixture
    def association_engine(self, consolidation_config):
        return CreativeAssociationEngine(consolidation_config)
    
    @pytest.mark.asyncio
    async def test_basic_association_discovery(self, association_engine, sample_memories):
        """Test basic association discovery functionality."""
        # Use memories that should have some associations
        memories = sample_memories[:5]
        
        associations = await association_engine.process(memories)
        
        # Should find some associations
        assert isinstance(associations, list)
        assert all(isinstance(assoc, MemoryAssociation) for assoc in associations)
        
        # Check association properties
        for assoc in associations:
            assert len(assoc.source_memory_hashes) == 2
            assert 0.3 <= assoc.similarity_score <= 0.7  # Sweet spot range
            assert assoc.discovery_method == "creative_association"
            assert isinstance(assoc.discovery_date, datetime)
    
    @pytest.mark.asyncio
    async def test_similarity_sweet_spot_filtering(self, association_engine):
        """Test that only memories in similarity sweet spot are associated."""
        now = datetime.now()
        
        # Create memories with known similarity relationships
        base_memory = Memory(
            content="Python programming concepts",
            content_hash="base",
            tags=["python", "programming"],
            embedding=[0.5, 0.5, 0.5, 0.5, 0.5] * 64,
            created_at=now.timestamp()
        )
        
        # Very similar memory (should be filtered out - too similar)
        too_similar = Memory(  
            content="Python programming concepts and techniques",
            content_hash="similar",
            tags=["python", "programming"],
            embedding=[0.51, 0.51, 0.51, 0.51, 0.51] * 64,  # Very similar
            created_at=now.timestamp()
        )
        
        # Moderately similar memory (should be included)
        good_similarity = Memory(
            content="JavaScript development practices",
            content_hash="moderate",
            tags=["javascript", "development"],
            embedding=[0.6, 0.4, 0.6, 0.4, 0.6] * 64,  # Moderate similarity
            created_at=now.timestamp()
        )
        
        # Very different memory (should be filtered out - too different)
        too_different = Memory(
            content="Weather forecast for tomorrow",
            content_hash="different",
            tags=["weather", "forecast"],
            embedding=[0.1, 0.9, 0.1, 0.9, 0.1] * 64,  # Very different
            created_at=now.timestamp()
        )
        
        memories = [base_memory, too_similar, good_similarity, too_different]
        associations = await association_engine.process(memories)
        
        # Should only find association between base and good_similarity
        if associations:  # May be empty due to confidence threshold
            for assoc in associations:
                assert assoc.similarity_score >= 0.3
                assert assoc.similarity_score <= 0.7
    
    @pytest.mark.asyncio
    async def test_existing_associations_filtering(self, association_engine, sample_memories):
        """Test that existing associations are not duplicated."""
        memories = sample_memories[:4]
        
        # Create set of existing associations
        existing = {
            (memories[0].content_hash, memories[1].content_hash),
            (memories[1].content_hash, memories[0].content_hash)  # Both directions
        }
        
        associations = await association_engine.process(
            memories, 
            existing_associations=existing
        )
        
        # Should not include the existing association
        for assoc in associations:
            pair = tuple(sorted(assoc.source_memory_hashes))
            existing_pairs = {tuple(sorted(list(existing_pair))) for existing_pair in existing}
            assert pair not in existing_pairs
    
    @pytest.mark.asyncio
    async def test_association_analysis(self, association_engine):
        """Test the association analysis functionality."""
        # Create memories with known relationships
        mem1 = Memory(
            content="Python list comprehensions provide concise syntax",
            content_hash="mem1",
            tags=["python", "syntax"],
            embedding=[0.4, 0.5, 0.6, 0.5, 0.4] * 64,
            created_at=datetime.now().timestamp()
        )
        
        mem2 = Memory(
            content="JavaScript arrow functions offer clean syntax",
            content_hash="mem2", 
            tags=["javascript", "syntax"],
            embedding=[0.5, 0.4, 0.5, 0.6, 0.5] * 64,
            created_at=datetime.now().timestamp()
        )
        
        # Calculate similarity
        similarity = await association_engine._calculate_semantic_similarity(mem1, mem2)
        
        # Analyze the association
        analysis = await association_engine._analyze_association(mem1, mem2, similarity)
        
        assert isinstance(analysis, AssociationAnalysis)
        assert analysis.memory1_hash == "mem1"
        assert analysis.memory2_hash == "mem2"
        assert analysis.similarity_score == similarity
        assert "shared_tags" in analysis.connection_reasons  # Both have "syntax" tag
        assert "syntax" in analysis.tag_overlap
        assert analysis.confidence_score > 0
    
    @pytest.mark.asyncio
    async def test_temporal_relationship_analysis(self, association_engine):
        """Test temporal relationship detection."""
        now = datetime.now()
        
        # Memories created on same day
        mem1 = Memory(
            content="Morning meeting notes",
            content_hash="morning",
            tags=["meeting"],
            embedding=[0.4, 0.5, 0.6] * 107,  # ~320 dim
            created_at=now.timestamp()
        )
        
        mem2 = Memory(
            content="Afternoon project update",
            content_hash="afternoon",
            tags=["project"],
            embedding=[0.5, 0.4, 0.5] * 107,
            created_at=(now + timedelta(hours=6)).timestamp()
        )
        
        analysis = await association_engine._analyze_association(
            mem1, mem2, 0.5
        )
        
        assert analysis.temporal_relationship == "same_day"
        assert "temporal_proximity" in analysis.connection_reasons
    
    @pytest.mark.asyncio
    async def test_concept_extraction(self, association_engine):
        """Test concept extraction from memory content."""
        content = 'Check out this URL: https://example.com and email me at test@example.com. The API returns {"status": "success"} with CamelCase variables.'
        
        concepts = association_engine._extract_concepts(content)
        
        # Should extract various types of concepts
        assert "https://example.com" in concepts or any("example.com" in c for c in concepts)
        assert "test@example.com" in concepts
        assert "CamelCase" in concepts or any("camel" in c.lower() for c in concepts)
        assert len(concepts) > 0
    
    @pytest.mark.asyncio
    async def test_structural_similarity_detection(self, association_engine):
        """Test detection of similar structural patterns."""
        content1 = """
        # Header 1
        - Item 1
        - Item 2
        ```code block```
        """
        
        content2 = """
        # Header 2  
        - Different item 1
        - Different item 2
        ```another code block```
        """
        
        has_similar = association_engine._has_similar_structure(content1, content2)
        assert has_similar is True
        
        # Test different structure
        content3 = "Just plain text without any special formatting."
        has_different = association_engine._has_similar_structure(content1, content3)
        assert has_different is False
    
    @pytest.mark.asyncio
    async def test_complementary_content_detection(self, association_engine):
        """Test detection of complementary content patterns."""
        # Question and answer pattern
        question_content = "How do you implement binary search? What is the time complexity?"
        answer_content = "Binary search implementation uses divide and conquer. Time complexity is O(log n)."
        
        is_complementary = association_engine._has_complementary_content(
            question_content, answer_content
        )
        assert is_complementary is True
        
        # Problem and solution pattern
        problem_content = "The database query is failing with timeout error"
        solution_content = "Fixed the timeout by adding proper indexing to resolve the issue"
        
        is_complementary_ps = association_engine._has_complementary_content(
            problem_content, solution_content
        )
        assert is_complementary_ps is True
    
    @pytest.mark.asyncio
    async def test_confidence_score_calculation(self, association_engine):
        """Test confidence score calculation."""
        # High confidence scenario
        high_confidence = association_engine._calculate_confidence_score(
            similarity=0.6,      # Good similarity
            num_reasons=3,       # Multiple connection reasons
            num_shared_concepts=5,  # Many shared concepts
            num_shared_tags=2    # Shared tags
        )
        
        # Low confidence scenario
        low_confidence = association_engine._calculate_confidence_score(  
            similarity=0.35,     # Lower similarity
            num_reasons=1,       # Few reasons
            num_shared_concepts=1,  # Few concepts
            num_shared_tags=0    # No shared tags
        )
        
        assert high_confidence > low_confidence
        assert 0 <= high_confidence <= 1
        assert 0 <= low_confidence <= 1
    
    @pytest.mark.asyncio
    async def test_filter_high_confidence_associations(self, association_engine, sample_memories):
        """Test filtering associations by confidence score."""
        memories = sample_memories[:4]
        associations = await association_engine.process(memories)
        
        if associations:  # Only test if associations were found
            high_confidence = await association_engine.filter_high_confidence_associations(
                associations, min_confidence=0.7
            )
            
            # All returned associations should meet confidence threshold
            for assoc in high_confidence:
                assert assoc.metadata.get('confidence_score', 0) >= 0.7
    
    @pytest.mark.asyncio
    async def test_group_associations_by_type(self, association_engine, sample_memories):
        """Test grouping associations by connection type."""
        memories = sample_memories[:5]
        associations = await association_engine.process(memories)
        
        if associations:  # Only test if associations were found
            grouped = await association_engine.group_associations_by_type(associations)
            
            assert isinstance(grouped, dict)
            
            # Each group should contain associations of the same type
            for connection_type, group in grouped.items():
                assert all(assoc.connection_type == connection_type for assoc in group)
    
    @pytest.mark.asyncio
    async def test_text_similarity_fallback(self, association_engine):
        """Test text similarity fallback when embeddings are unavailable."""
        mem1 = Memory(
            content="python programming language concepts",
            content_hash="text1",
            tags=["python"],
            embedding=None,  # No embedding
            created_at=datetime.now().timestamp()
        )
        
        mem2 = Memory(
            content="programming language python concepts", 
            content_hash="text2",
            tags=["python"],
            embedding=None,  # No embedding
            created_at=datetime.now().timestamp()
        )
        
        similarity = await association_engine._calculate_semantic_similarity(mem1, mem2)
        
        # Should use text-based similarity
        assert 0 <= similarity <= 1
        assert similarity > 0  # Should find some similarity due to word overlap
    
    @pytest.mark.asyncio
    async def test_max_pairs_limiting(self, association_engine, large_memory_set):
        """Test that pair sampling limits combinatorial explosion."""
        # Use many memories to test pair limiting
        memories = large_memory_set[:20]  # 20 memories = 190 possible pairs
        
        # Mock the max_pairs to a small number for testing
        original_max = association_engine.max_pairs_per_run
        association_engine.max_pairs_per_run = 10
        
        try:
            associations = await association_engine.process(memories)
            
            # Should handle large memory sets without performance issues
            # and limit the number of pairs processed
            assert isinstance(associations, list)
            
        finally:
            # Restore original value
            association_engine.max_pairs_per_run = original_max
    
    @pytest.mark.asyncio
    async def test_empty_memories_list(self, association_engine):
        """Test handling of empty or insufficient memories list."""
        # Empty list
        associations = await association_engine.process([])
        assert associations == []
        
        # Single memory (can't create associations)
        single_memory = [Memory(
            content="Single memory",
            content_hash="single",
            tags=["test"],
            embedding=[0.1] * 320,
            created_at=datetime.now().timestamp()
        )]
        
        associations = await association_engine.process(single_memory)
        assert associations == []
    
    @pytest.mark.asyncio
    async def test_association_metadata_completeness(self, association_engine, sample_memories):
        """Test that association metadata contains all expected fields."""
        memories = sample_memories[:3]
        associations = await association_engine.process(memories)
        
        for assoc in associations:
            # Check basic fields
            assert len(assoc.source_memory_hashes) == 2
            assert isinstance(assoc.similarity_score, float)
            assert isinstance(assoc.connection_type, str)
            assert assoc.discovery_method == "creative_association"
            assert isinstance(assoc.discovery_date, datetime)
            
            # Check metadata fields
            assert 'shared_concepts' in assoc.metadata
            assert 'confidence_score' in assoc.metadata
            assert 'analysis_version' in assoc.metadata
            assert isinstance(assoc.metadata['shared_concepts'], list)
            assert isinstance(assoc.metadata['confidence_score'], float)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/doobidoo/mcp-memory-service'

If you have feedback or need assistance with the MCP directory API, please join our Discord server