Skip to main content
Glama

MCP Standards

by airmcp-com
test_feedback_loops.py11.5 kB
"""Integration tests for self-learning and feedback loops Tests pattern detection, learning from corrections, and automatic improvement. Target: 85%+ coverage for feedback mechanisms """ import pytest import asyncio @pytest.mark.integration class TestPatternLearning: """Test automatic pattern learning from corrections""" async def test_learn_from_single_correction(self, memory_server): """Test system captures single correction""" result = await memory_server._log_tool_execution( tool_name="edit_file", args={ "file_path": "src/app.py", "old_string": "print('debug')", "new_string": "logger.debug('debug')" }, result={"success": True} ) assert result["success"] is True async def test_learn_from_repeated_corrections(self, memory_server): """Test pattern detection from 3+ identical corrections""" # Simulate 3 identical corrections for i in range(3): await memory_server._log_tool_execution( tool_name="edit_file", args={ "file_path": f"src/file_{i}.py", "old_string": f"print('log {i}')", "new_string": f"logger.info('log {i}')" }, result={"success": True} ) # Pattern should be detected # Check if pattern extractor captured it patterns = memory_server.pattern_extractor.extract_patterns() # May or may not detect depending on threshold assert isinstance(patterns, list) async def test_pattern_confidence_scoring(self, memory_server): """Test confidence increases with repetition""" # First correction: low confidence await memory_server._log_tool_execution( tool_name="edit_file", args={ "file_path": "src/test1.py", "old_string": "x = 1", "new_string": "x: int = 1" }, result={"success": True} ) # Add more corrections for i in range(5): await memory_server._log_tool_execution( tool_name="edit_file", args={ "file_path": f"src/test{i+2}.py", "old_string": f"y = {i}", "new_string": f"y: int = {i}" }, result={"success": True} ) # Confidence should increase patterns = memory_server.pattern_extractor.extract_patterns() assert isinstance(patterns, list) async def test_category_classification(self, memory_server): """Test patterns are correctly categorized""" # Python logging pattern await memory_server._log_tool_execution( tool_name="edit_file", args={ "file_path": "src/logger.py", "old_string": "print('error')", "new_string": "logger.error('error')" }, result={"success": True} ) # Type hint pattern await memory_server._log_tool_execution( tool_name="edit_file", args={ "file_path": "src/types.py", "old_string": "def func(x):", "new_string": "def func(x: int) -> str:" }, result={"success": True} ) patterns = memory_server.pattern_extractor.extract_patterns() # Should categorize by type assert isinstance(patterns, list) async def test_project_specific_vs_global_patterns(self, memory_server): """Test distinction between project-specific and global patterns""" # Add project-specific pattern result = await memory_server._learn_preference( category="python-style", context="project-specific indentation", preference="Use 2 spaces for indentation", project_specific=True, project_path="/tmp/project1" ) # Add global pattern result2 = await memory_server._learn_preference( category="python-style", context="global standard", preference="Use type hints everywhere", project_specific=False, project_path="" ) assert result.get("success", False) or "success" in result assert result2.get("success", False) or "success" in result2 @pytest.mark.integration class TestFeedbackIntegration: """Test full feedback loop integration""" async def test_correction_to_claudemd_workflow(self, memory_server, tmp_path): """Test complete workflow: correction -> learning -> CLAUDE.md update""" # Step 1: Log corrections for i in range(3): await memory_server._log_tool_execution( tool_name="edit_file", args={ "file_path": f"src/test_{i}.py", "old_string": f"var = {i}", "new_string": f"var: int = {i}" }, result={"success": True} ) await asyncio.sleep(0.01) # Step 2: Extract patterns patterns = memory_server.pattern_extractor.extract_patterns() # Step 3: Update CLAUDE.md claude_file = tmp_path / "CLAUDE.md" result = await memory_server._update_claudemd( str(claude_file), project_path=str(tmp_path), min_confidence=0.5 ) assert "success" in result async def test_learned_preferences_retrieval(self, memory_server): """Test retrieving learned preferences""" # Add some preferences await memory_server._learn_preference( category="python-logging", context="error handling", preference="Use logger instead of print", project_specific=False, project_path="" ) # Retrieve preferences prefs = await memory_server._get_learned_preferences( category="python-logging", min_confidence=0.5 ) assert prefs.get("success", False) or isinstance(prefs, dict) async def test_preference_confidence_filtering(self, memory_server): """Test filtering preferences by confidence threshold""" # Add low confidence preference await memory_server._learn_preference( category="test-category", context="test", preference="Low confidence rule", project_specific=False, project_path="", confidence=0.3 ) # Query with high confidence threshold prefs_high = await memory_server._get_learned_preferences( category="test-category", min_confidence=0.8 ) # Query with low confidence threshold prefs_low = await memory_server._get_learned_preferences( category="test-category", min_confidence=0.2 ) # High threshold should return fewer results assert isinstance(prefs_high, dict) assert isinstance(prefs_low, dict) @pytest.mark.integration class TestAdaptiveLearning: """Test adaptive learning and improvement""" async def test_pattern_promotion(self, memory_server): """Test patterns get promoted after repeated validation""" pattern_data = { "pattern": "use_type_hints", "context": "function parameters", "confidence": 0.5 } # Simulate validation successes for i in range(5): await memory_server._log_tool_execution( tool_name="validate_pattern", args=pattern_data, result={"success": True, "validated": True} ) # Confidence should increase # This is implicit in the pattern learning logic async def test_false_positive_detection(self, memory_server): """Test detection of false positive patterns""" # Add a pattern await memory_server._learn_preference( category="false-positive", context="test", preference="This might be wrong", project_specific=False, project_path="" ) # Simulate rejections/corrections of this pattern for i in range(3): await memory_server._log_tool_execution( tool_name="revert_change", args={"pattern": "This might be wrong"}, result={"success": True} ) # System should lower confidence or mark as false positive # This depends on implementation details async def test_context_aware_learning(self, memory_server): """Test patterns are learned with context awareness""" # Backend API context await memory_server._log_tool_execution( tool_name="edit_file", args={ "file_path": "backend/api.py", "old_string": "return data", "new_string": "return JSONResponse(data)" }, result={"success": True} ) # Frontend context (different pattern) await memory_server._log_tool_execution( tool_name="edit_file", args={ "file_path": "frontend/component.tsx", "old_string": "return data", "new_string": "return <div>{data}</div>" }, result={"success": True} ) # Should recognize context differences patterns = memory_server.pattern_extractor.extract_patterns() assert isinstance(patterns, list) @pytest.mark.integration @pytest.mark.slow class TestLongTermLearning: """Test long-term learning and memory""" async def test_pattern_persistence(self, memory_server): """Test learned patterns persist across sessions""" # Learn a pattern await memory_server._learn_preference( category="persistent", context="test", preference="This should persist", project_specific=False, project_path="" ) # Simulate new session (recreate server with same DB) from claude_memory.server import ClaudeMemoryMCP new_server = ClaudeMemoryMCP(db_path=str(memory_server.db_path)) # Should still be able to retrieve prefs = await new_server._get_learned_preferences( category="persistent", min_confidence=0.1 ) assert isinstance(prefs, dict) async def test_knowledge_accumulation(self, memory_server): """Test knowledge accumulates over time""" import sqlite3 # Get initial count with sqlite3.connect(memory_server.db_path) as conn: initial_count = conn.execute("SELECT COUNT(*) FROM tool_logs").fetchone()[0] # Add 100 corrections for i in range(100): await memory_server._log_tool_execution( tool_name="edit_file", args={"file": f"test_{i}.py", "change": f"change_{i}"}, result={"success": True} ) # Verify accumulation with sqlite3.connect(memory_server.db_path) as conn: final_count = conn.execute("SELECT COUNT(*) FROM tool_logs").fetchone()[0] assert final_count > initial_count

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/airmcp-com/mcp-standards'

If you have feedback or need assistance with the MCP directory API, please join our Discord server