Skip to main content
Glama
benchmark_server_caching.py5.77 kB
#!/usr/bin/env python3 """ Benchmark production MCP server (server.py) caching performance. Tests global caching implementation to measure performance improvement from baseline ~1,810ms to target <400ms on cache hits. Usage: python scripts/benchmarks/benchmark_server_caching.py """ import asyncio import time import sys from pathlib import Path # Add src to path sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) from mcp_memory_service.server import MemoryServer from mcp_memory_service import config async def benchmark_server_caching(): """Benchmark production MCP server caching performance.""" print("=" * 80) print("PRODUCTION MCP SERVER CACHING PERFORMANCE BENCHMARK") print("=" * 80) print(f"Storage Backend: {config.STORAGE_BACKEND}") print(f"Database Path: {config.SQLITE_VEC_PATH}") print() # Create server instance server = MemoryServer() results = [] num_calls = 10 print(f"Running {num_calls} consecutive storage initialization calls...\n") for i in range(num_calls): # Reset storage flag to simulate fresh initialization check # (but cache will persist between calls) if i > 0: server._storage_initialized = False start = time.time() # Call the lazy initialization method await server._ensure_storage_initialized() duration_ms = (time.time() - start) * 1000 results.append(duration_ms) call_type = "CACHE MISS" if i == 0 else "CACHE HIT" print(f"Call #{i+1:2d}: {duration_ms:7.2f}ms ({call_type})") # Import cache stats from server module from mcp_memory_service import server as server_module cache_stats = server_module._CACHE_STATS # Calculate statistics first_call = results[0] # Cache miss cached_calls = results[1:] # Cache hits avg_cached = sum(cached_calls) / len(cached_calls) if cached_calls else 0 min_cached = min(cached_calls) if cached_calls else 0 max_cached = max(cached_calls) if cached_calls else 0 print() print("=" * 80) print("RESULTS") print("=" * 80) print(f"First Call (Cache Miss): {first_call:7.2f}ms") print(f"Cached Calls Average: {avg_cached:7.2f}ms") print(f"Cached Calls Min: {min_cached:7.2f}ms") print(f"Cached Calls Max: {max_cached:7.2f}ms") print() # Calculate improvement if avg_cached > 0: improvement = ((first_call - avg_cached) / first_call) * 100 speedup = first_call / avg_cached print(f"Performance Improvement: {improvement:.1f}%") print(f"Speedup Factor: {speedup:.2f}x faster") print() print("=" * 80) print("CACHE STATISTICS") print("=" * 80) print(f"Total Initialization Calls: {cache_stats['total_calls']}") print(f"Storage Cache Hits: {cache_stats['storage_hits']}") print(f"Storage Cache Misses: {cache_stats['storage_misses']}") print(f"Service Cache Hits: {cache_stats['service_hits']}") print(f"Service Cache Misses: {cache_stats['service_misses']}") storage_cache = server_module._STORAGE_CACHE service_cache = server_module._MEMORY_SERVICE_CACHE print(f"Storage Cache Size: {len(storage_cache)} instances") print(f"Service Cache Size: {len(service_cache)} instances") total_checks = cache_stats['total_calls'] * 2 total_hits = cache_stats['storage_hits'] + cache_stats['service_hits'] hit_rate = (total_hits / total_checks * 100) if total_checks > 0 else 0 print(f"Overall Cache Hit Rate: {hit_rate:.1f}%") print() print("=" * 80) print("COMPARISON TO BASELINE") print("=" * 80) print("Baseline (no caching): 1,810ms per call") print(f"Optimized (cache miss): {first_call:7.2f}ms") print(f"Optimized (cache hit): {avg_cached:7.2f}ms") print() # Determine success target_cached_time = 400 # ms if avg_cached < target_cached_time: print(f"✅ SUCCESS: Cache hit average ({avg_cached:.2f}ms) is under target ({target_cached_time}ms)") success = True else: print(f"⚠️ PARTIAL: Cache hit average ({avg_cached:.2f}ms) exceeds target ({target_cached_time}ms)") print(f" Note: Still a significant improvement over baseline!") success = avg_cached < 1000 # Consider <1s a success print() # Test get_cache_stats MCP tool print("=" * 80) print("TESTING get_cache_stats MCP TOOL") print("=" * 80) try: # Call with empty arguments dict (tool takes no parameters) result = await server.handle_get_cache_stats({}) # Extract the actual stats from MCP response format (safely parse JSON) import json stats_result = json.loads(result[0].text) if result else {} print("✅ get_cache_stats tool works!") print(f" Hit Rate: {stats_result.get('hit_rate', 'N/A')}%") print(f" Message: {stats_result.get('message', 'N/A')}") except Exception as e: print(f"❌ get_cache_stats tool failed: {e}") print() return { "success": success, "first_call_ms": first_call, "avg_cached_ms": avg_cached, "min_cached_ms": min_cached, "max_cached_ms": max_cached, "improvement_pct": improvement if avg_cached > 0 else 0, "cache_hit_rate": hit_rate } if __name__ == "__main__": try: results = asyncio.run(benchmark_server_caching()) # Exit code based on success sys.exit(0 if results["success"] else 1) except Exception as e: print(f"\n❌ Benchmark failed with error: {e}") import traceback traceback.print_exc() sys.exit(2)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/doobidoo/mcp-memory-service'

If you have feedback or need assistance with the MCP directory API, please join our Discord server