Skill Retriever

test_performance.py•4.48 KiB

"""Performance benchmarks for MCP server and pipeline.""" from __future__ import annotations import asyncio from typing import TYPE_CHECKING import pytest if TYPE_CHECKING: from pytest_benchmark.fixture import BenchmarkFixture class TestStartupPerformance: """MCP server startup time tests.""" def test_pipeline_startup_under_3_seconds( self, benchmark: BenchmarkFixture ) -> None: """Pipeline initialization should complete in under 3 seconds.""" async def init_pipeline() -> None: # Reset global state to force fresh initialization from skill_retriever.mcp import server server._pipeline = None server._graph_store = None server._vector_store = None server._metadata_store = None await server.get_pipeline() def run_init() -> None: asyncio.run(init_pipeline()) # Run benchmark with warmup result = benchmark.pedantic( run_init, rounds=3, warmup_rounds=1, ) # Assert startup SLA max_time = benchmark.stats["max"] assert max_time < 3.0, f"Startup time {max_time:.2f}s exceeds 3s SLA" class TestQueryLatency: """Query latency and throughput tests.""" def test_simple_query_under_500ms( self, seeded_pipeline, benchmark: BenchmarkFixture ) -> None: """Simple query should complete in under 500ms.""" result = benchmark(seeded_pipeline.retrieve, "authentication agent", top_k=5) max_latency = benchmark.stats["max"] * 1000 # Convert to ms assert max_latency < 500, f"Query latency {max_latency:.0f}ms exceeds 500ms SLA" def test_complex_query_under_1000ms( self, seeded_pipeline, benchmark: BenchmarkFixture ) -> None: """Complex multi-hop query should complete in under 1000ms.""" complex_query = ( "I need JWT authentication with refresh tokens " "and OAuth integration for GitHub login" ) result = benchmark(seeded_pipeline.retrieve, complex_query, top_k=10) max_latency = benchmark.stats["max"] * 1000 assert max_latency < 1000, f"Complex query latency {max_latency:.0f}ms exceeds 1000ms" class TestLoadStability: """Load testing and degradation detection.""" def test_sequential_queries_no_degradation(self, seeded_pipeline) -> None: """10 sequential queries should not show performance degradation.""" queries = [ "JWT authentication", "GitHub repository analysis", "LinkedIn post writer", "OAuth login flow", "MCP server setup", "debugging agent", "code review tool", "email processing", "data analysis", "security audit", ] # Clear cache to ensure fresh queries seeded_pipeline.clear_cache() latencies: list[float] = [] for query in queries: result = seeded_pipeline.retrieve(query, top_k=5) latencies.append(result.latency_ms) # First 5 vs last 5 comparison first_half_avg = sum(latencies[:5]) / 5 second_half_avg = sum(latencies[5:]) / 5 print(f"\nLatencies: {[f'{l:.1f}ms' for l in latencies]}") print(f"First 5 avg: {first_half_avg:.1f}ms") print(f"Last 5 avg: {second_half_avg:.1f}ms") # Allow 50% degradation tolerance (generous for cold start effects) degradation_ratio = second_half_avg / first_half_avg if first_half_avg > 0 else 1 assert degradation_ratio < 1.5, ( f"Performance degraded by {(degradation_ratio - 1) * 100:.0f}%" ) def test_cached_queries_fast(self, seeded_pipeline) -> None: """Cached queries should be significantly faster than cold queries.""" query = "authentication agent" # Cold query (first run) seeded_pipeline.clear_cache() cold_result = seeded_pipeline.retrieve(query, top_k=5) cold_latency = cold_result.latency_ms # Warm query (cached) warm_result = seeded_pipeline.retrieve(query, top_k=5) warm_latency = warm_result.latency_ms print(f"\nCold latency: {cold_latency:.1f}ms") print(f"Warm latency: {warm_latency:.1f}ms") assert warm_result.cache_hit, "Second query should be cache hit" assert warm_latency < cold_latency * 0.5, ( f"Cached query not significantly faster: {warm_latency:.1f}ms vs {cold_latency:.1f}ms" )

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/AnthonyAlcaraz/skill-retriever'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_performance.py•4.48 KiB