DM20 Protocol

Overview Schema Related Servers Score Discussions

benchmarks.py•7.14 KiB

""" Performance benchmarking for the Claudmaster AI DM system. This module provides a framework for running performance benchmarks, collecting metrics, and comparing results against target thresholds. """ from __future__ import annotations import asyncio import inspect import math import time from dataclasses import dataclass, field from typing import Callable @dataclass class BenchmarkResult: """Result of a single benchmark.""" name: str iterations: int times: list[float] min_time: float max_time: float avg_time: float p50: float p95: float p99: float @dataclass class BenchmarkSuite: """Results of a full benchmark run.""" results: list[BenchmarkResult] total_time: float target_comparison: list[str] = field(default_factory=list) # Human-readable comparison notes # Performance targets (seconds) PERFORMANCE_TARGETS = { "player_action": {"p50": 3.0, "p95": 10.0, "p99": 15.0}, "context_building": {"p50": 0.5, "p95": 1.5, "p99": 3.0}, "agent_query": {"p50": 1.0, "p95": 3.0, "p99": 5.0}, "state_update": {"p50": 0.1, "p95": 0.3, "p99": 0.5}, "cache_lookup": {"p50": 0.005, "p95": 0.02, "p99": 0.05}, } class PerformanceBenchmark: """ Runs performance benchmarks for the Claudmaster system. This class manages benchmark registration, execution, and result analysis. Benchmarks can be sync or async functions and are run multiple times to collect statistical data. Usage: benchmark = PerformanceBenchmark() # Register benchmarks benchmark.register_benchmark("cache_lookup", lambda: cache.get("key")) benchmark.register_benchmark("agent_query", lambda: agent.run(request)) # Run all suite = await benchmark.run_all(iterations=100) # Compare to targets comparison = benchmark.compare_to_targets(suite) for line in comparison: print(line) """ def __init__(self): """Initialize the benchmark runner.""" self._benchmarks: dict[str, Callable] = {} def register_benchmark(self, name: str, func: Callable) -> None: """ Register a benchmark function. The function can be sync or async. It will be called multiple times during benchmark execution. Args: name: Name of the benchmark func: The function to benchmark (sync or async) """ self._benchmarks[name] = func async def run_benchmark( self, name: str, iterations: int = 100, ) -> BenchmarkResult: """ Run a single named benchmark. Args: name: Name of the benchmark to run iterations: Number of times to run the benchmark (default: 100) Returns: BenchmarkResult with timing statistics Raises: KeyError: If benchmark name is not registered """ if name not in self._benchmarks: raise KeyError(f"Benchmark '{name}' not registered") func = self._benchmarks[name] times: list[float] = [] # Determine if function is async is_async = inspect.iscoroutinefunction(func) # Run iterations for _ in range(iterations): start = time.perf_counter() if is_async: await func() else: func() duration = time.perf_counter() - start times.append(duration) # Compute statistics return BenchmarkResult( name=name, iterations=iterations, times=times, min_time=min(times), max_time=max(times), avg_time=sum(times) / len(times), p50=self._compute_percentile(times, 50), p95=self._compute_percentile(times, 95), p99=self._compute_percentile(times, 99), ) async def run_all(self, iterations: int = 100) -> BenchmarkSuite: """ Run all registered benchmarks. Benchmarks are run sequentially. The total time includes all benchmark execution time. Args: iterations: Number of iterations per benchmark (default: 100) Returns: BenchmarkSuite with results from all benchmarks """ suite_start = time.perf_counter() results: list[BenchmarkResult] = [] for name in self._benchmarks: result = await self.run_benchmark(name, iterations) results.append(result) total_time = time.perf_counter() - suite_start # Generate target comparison comparison = self.compare_to_targets(BenchmarkSuite( results=results, total_time=total_time, target_comparison=[], )) return BenchmarkSuite( results=results, total_time=total_time, target_comparison=comparison, ) def compare_to_targets( self, suite: BenchmarkSuite, ) -> list[str]: """ Compare results to PERFORMANCE_TARGETS. Generates human-readable comparison messages like: - "✅ cache_lookup p95: 0.015s (target: 0.02s)" - "❌ agent_query p95: 4.5s (target: 3.0s)" Args: suite: The benchmark suite to compare Returns: List of comparison message strings """ messages: list[str] = [] for result in suite.results: # Check if this benchmark has targets if result.name not in PERFORMANCE_TARGETS: # No targets defined for this benchmark messages.append(f"ℹ️ {result.name}: no targets defined") continue targets = PERFORMANCE_TARGETS[result.name] # Compare p50, p95, p99 for percentile_name in ["p50", "p95", "p99"]: if percentile_name not in targets: continue target_value = targets[percentile_name] actual_value = getattr(result, percentile_name) # Determine pass/fail if actual_value <= target_value: status = "✅" else: status = "❌" messages.append( f"{status} {result.name} {percentile_name}: " f"{actual_value:.3f}s (target: {target_value}s)" ) return messages @staticmethod def _compute_percentile(values: list[float], percentile: float) -> float: """ Compute percentile from list of values. Args: values: List of numeric values percentile: Percentile to compute (0-100) Returns: The percentile value """ if not values: return 0.0 sorted_values = sorted(values) count = len(sorted_values) # Calculate index: ceil(percentile/100 * count) - 1 index = math.ceil(percentile / 100.0 * count) - 1 # Clamp to valid range index = max(0, min(index, count - 1)) return sorted_values[index] __all__ = [ "BenchmarkResult", "BenchmarkSuite", "PERFORMANCE_TARGETS", "PerformanceBenchmark", ]

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Polloinfilzato/dm20-protocol'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

benchmarks.py•7.14 KiB