Skip to main content
Glama

Codebase MCP Server

by Ravenight13
parse_baseline.py10.6 kB
#!/usr/bin/env python3 """Parse pytest-benchmark baseline results for performance analysis. Extracts key metrics from pytest-benchmark JSON output and displays them in a human-readable format with target comparisons. Usage: python scripts/parse_baseline.py baseline-results.json Constitutional Compliance: - Principle VIII: Type Safety (mypy --strict compliance, Pydantic models) - Principle V: Production Quality (comprehensive error handling) """ from __future__ import annotations import json import sys from decimal import Decimal from pathlib import Path from typing import Any, Final from pydantic import BaseModel, Field, ValidationError # ============================================================================== # Constants # ============================================================================== # Performance targets from constitutional principles INDEXING_TARGET_SECONDS: Final[Decimal] = Decimal("60.0") SEARCH_P50_TARGET_MS: Final[Decimal] = Decimal("250.0") SEARCH_P95_TARGET_MS: Final[Decimal] = Decimal("500.0") SEARCH_P99_TARGET_MS: Final[Decimal] = Decimal("750.0") # ============================================================================== # Pydantic Models for JSON Parsing # ============================================================================== class BenchmarkStats(BaseModel): """Statistics from a pytest-benchmark run. Fields match pytest-benchmark JSON output structure. """ min: float = Field(description="Minimum execution time in seconds") max: float = Field(description="Maximum execution time in seconds") mean: float = Field(description="Mean execution time in seconds") median: float = Field(description="Median execution time in seconds") stddev: float = Field(description="Standard deviation in seconds") q1: float = Field(description="First quartile (25th percentile) in seconds") q3: float = Field(description="Third quartile (75th percentile) in seconds") iqr: float = Field(description="Interquartile range in seconds") # Percentiles (may be missing in some output formats) p50: float | None = Field(None, description="50th percentile (median) in seconds") p95: float | None = Field(None, description="95th percentile in seconds") p99: float | None = Field(None, description="99th percentile in seconds") class BenchmarkResult(BaseModel): """Individual benchmark test result. Represents a single benchmark from pytest-benchmark output. """ name: str = Field(description="Test name (e.g., test_indexing_baseline)") fullname: str = Field(description="Full test path") stats: BenchmarkStats = Field(description="Statistical measurements") rounds: int = Field(description="Number of benchmark rounds executed") class BenchmarkMachineInfo(BaseModel): """Machine information from benchmark run. Captures environment context for reproducibility. """ node: str = Field(description="Machine hostname") processor: str = Field(description="CPU model") python_version: str = Field(description="Python version string") class BenchmarkReport(BaseModel): """Complete pytest-benchmark JSON report. Top-level structure of benchmark output file. """ machine_info: BenchmarkMachineInfo = Field(description="Environment information") benchmarks: list[BenchmarkResult] = Field(description="List of benchmark results") # ============================================================================== # Metric Extraction # ============================================================================== def extract_indexing_metrics(report: BenchmarkReport) -> BenchmarkResult | None: """Extract indexing benchmark metrics. Args: report: Parsed benchmark report Returns: Indexing benchmark result, or None if not found """ for benchmark in report.benchmarks: if "indexing" in benchmark.name.lower(): return benchmark return None def extract_search_metrics(report: BenchmarkReport) -> BenchmarkResult | None: """Extract search benchmark metrics. Args: report: Parsed benchmark report Returns: Search benchmark result, or None if not found """ for benchmark in report.benchmarks: if "search" in benchmark.name.lower(): return benchmark return None # ============================================================================== # Display Functions # ============================================================================== def format_seconds(seconds: float) -> str: """Format seconds with appropriate precision. Args: seconds: Time in seconds Returns: Formatted string with units """ if seconds < 1.0: return f"{seconds * 1000:.2f}ms" return f"{seconds:.2f}s" def format_comparison(actual: Decimal, target: Decimal) -> str: """Format performance comparison with target. Args: actual: Actual measured value target: Target performance value Returns: Comparison string with pass/fail indicator """ if actual <= target: percentage = (actual / target) * 100 return f"✓ PASS ({percentage:.1f}% of target)" else: percentage = (actual / target) * 100 return f"✗ FAIL ({percentage:.1f}% of target)" def display_indexing_metrics(benchmark: BenchmarkResult) -> None: """Display indexing performance metrics. Args: benchmark: Indexing benchmark result """ print("\n" + "=" * 70) print("INDEXING PERFORMANCE BASELINE") print("=" * 70) mean_seconds = Decimal(str(benchmark.stats.mean)) median_seconds = Decimal(str(benchmark.stats.median)) print(f"\nTest: {benchmark.name}") print(f"Rounds: {benchmark.rounds}") print(f"\nTiming Statistics:") print(f" Mean: {format_seconds(float(mean_seconds))}") print(f" Median: {format_seconds(float(median_seconds))}") print(f" Min: {format_seconds(benchmark.stats.min)}") print(f" Max: {format_seconds(benchmark.stats.max)}") print(f" StdDev: {format_seconds(benchmark.stats.stddev)}") print(f"\nTarget Comparison:") print(f" Target: {format_seconds(float(INDEXING_TARGET_SECONDS))}") print(f" Result: {format_comparison(mean_seconds, INDEXING_TARGET_SECONDS)}") def display_search_metrics(benchmark: BenchmarkResult) -> None: """Display search performance metrics. Args: benchmark: Search benchmark result """ print("\n" + "=" * 70) print("SEARCH PERFORMANCE BASELINE") print("=" * 70) mean_ms = Decimal(str(benchmark.stats.mean * 1000)) p50_ms = ( Decimal(str(benchmark.stats.p50 * 1000)) if benchmark.stats.p50 else Decimal(str(benchmark.stats.median * 1000)) ) p95_ms = Decimal(str(benchmark.stats.q3 * 1000)) if not benchmark.stats.p95 else Decimal(str(benchmark.stats.p95 * 1000)) p99_ms = Decimal(str(benchmark.stats.max * 1000)) if not benchmark.stats.p99 else Decimal(str(benchmark.stats.p99 * 1000)) print(f"\nTest: {benchmark.name}") print(f"Rounds: {benchmark.rounds}") print(f"\nLatency Statistics:") print(f" Mean: {format_seconds(benchmark.stats.mean)}") print(f" Median: {format_seconds(benchmark.stats.median)}") print(f" Min: {format_seconds(benchmark.stats.min)}") print(f" Max: {format_seconds(benchmark.stats.max)}") print(f"\nPercentile Analysis:") print(f" P50: {float(p50_ms):.2f}ms") print(f" P95: {float(p95_ms):.2f}ms") print(f" P99: {float(p99_ms):.2f}ms") print(f"\nTarget Comparisons:") print(f" P50 Target: {float(SEARCH_P50_TARGET_MS):.2f}ms") print(f" Result: {format_comparison(p50_ms, SEARCH_P50_TARGET_MS)}") print(f" P95 Target: {float(SEARCH_P95_TARGET_MS):.2f}ms") print(f" Result: {format_comparison(p95_ms, SEARCH_P95_TARGET_MS)}") def display_environment(machine_info: BenchmarkMachineInfo) -> None: """Display benchmark environment information. Args: machine_info: Machine information from benchmark run """ print("\n" + "=" * 70) print("ENVIRONMENT INFORMATION") print("=" * 70) print(f"\nMachine: {machine_info.node}") print(f"CPU: {machine_info.processor}") print(f"Python: {machine_info.python_version}") # ============================================================================== # Main Entry Point # ============================================================================== def parse_baseline_file(file_path: Path) -> int: """Parse and display baseline results from JSON file. Args: file_path: Path to pytest-benchmark JSON output Returns: Exit code (0 for success, 1 for failure) """ try: # Read and parse JSON json_data: dict[str, Any] = json.loads(file_path.read_text(encoding="utf-8")) report = BenchmarkReport.model_validate(json_data) # Display environment info display_environment(report.machine_info) # Extract and display indexing metrics indexing = extract_indexing_metrics(report) if indexing: display_indexing_metrics(indexing) else: print("\nWarning: No indexing benchmark found in results", file=sys.stderr) # Extract and display search metrics search = extract_search_metrics(report) if search: display_search_metrics(search) else: print("\nWarning: No search benchmark found in results", file=sys.stderr) print("\n" + "=" * 70) print() return 0 except FileNotFoundError: print(f"Error: File not found: {file_path}", file=sys.stderr) return 1 except json.JSONDecodeError as e: print(f"Error: Invalid JSON: {e}", file=sys.stderr) return 1 except ValidationError as e: print(f"Error: Invalid benchmark format: {e}", file=sys.stderr) return 1 except Exception as e: print(f"Error: {e}", file=sys.stderr) return 1 def main() -> int: """Main entry point. Returns: Exit code (0 for success, 1 for failure) """ if len(sys.argv) != 2: print("Usage: python scripts/parse_baseline.py <baseline-results.json>", file=sys.stderr) print("\nParses pytest-benchmark JSON output and displays performance metrics.", file=sys.stderr) return 1 file_path = Path(sys.argv[1]) return parse_baseline_file(file_path) if __name__ == "__main__": sys.exit(main())

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Ravenight13/codebase-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server