MCP Agent Mail

test_ci_regression.py•9.77 KiB

"""CI Performance Regression Detection Tests. This test module is designed to run in CI to catch performance regressions. It compares current benchmark results against baseline thresholds defined in baselines.json. Reference: mcp_agent_mail-tty (CI performance regression detection task) Usage: # Run regression checks (uses existing benchmark results) pytest tests/benchmarks/test_ci_regression.py -v # Run with benchmarks first (generates fresh results) RUN_BENCHMARKS=1 pytest tests/benchmarks/ -v -m "benchmark or ci_regression" Environment variables: CI_REGRESSION_STRICT: Set to "1" to fail on warnings too (default: errors only) CI_REGRESSION_SKIP: Set to "1" to skip regression checks entirely """ from __future__ import annotations import os import pytest from .regression import ( assert_no_regressions, assert_query_count_lte, check_regressions, load_baselines, render_report, write_report_json, ) def _should_skip_regression() -> bool: """Check if regression checks should be skipped.""" return os.getenv("CI_REGRESSION_SKIP") == "1" def _is_strict_mode() -> bool: """Check if strict mode is enabled (fail on warnings).""" return os.getenv("CI_REGRESSION_STRICT") == "1" @pytest.mark.ci_regression def test_performance_regression_check(): """Main CI regression check - fails if thresholds exceeded. This test: 1. Loads baseline thresholds from baselines.json 2. Loads latest benchmark results from tests/benchmarks/results/ 3. Compares each metric against thresholds 4. Renders a Rich report to stdout for CI logs 5. Writes JSON artifact for CI download 6. Fails if any violation (error) is found """ if _should_skip_regression(): pytest.skip("Skipped by CI_REGRESSION_SKIP=1") # This will raise AssertionError on violations assert_no_regressions() @pytest.mark.ci_regression def test_baselines_file_valid(): """Validate that baselines.json is well-formed and has required fields.""" baselines = load_baselines() assert "version" in baselines, "baselines.json missing 'version' field" assert "thresholds" in baselines, "baselines.json missing 'thresholds' field" assert "baselines" in baselines, "baselines.json missing 'baselines' field" # Check that baseline configs have required fields for tool, config in baselines.get("baselines", {}).items(): assert "latency_ms" in config, f"Baseline for {tool} missing 'latency_ms'" assert "p95_max" in config["latency_ms"], f"Baseline for {tool} missing 'latency_ms.p95_max'" @pytest.mark.ci_regression def test_regression_report_json_artifact(): """Test that regression report JSON artifact is properly generated.""" if _should_skip_regression(): pytest.skip("Skipped by CI_REGRESSION_SKIP=1") report = check_regressions() json_path = write_report_json(report) assert json_path.exists(), f"JSON artifact not created at {json_path}" import json content = json.loads(json_path.read_text()) assert "status" in content assert "violations" in content assert "warnings" in content assert "summary" in content @pytest.mark.ci_regression def test_regression_with_strict_mode(): """Test regression check in strict mode (fails on warnings too). Only runs if CI_REGRESSION_STRICT=1 is set. """ if not _is_strict_mode(): pytest.skip("Strict mode not enabled (set CI_REGRESSION_STRICT=1)") if _should_skip_regression(): pytest.skip("Skipped by CI_REGRESSION_SKIP=1") report = check_regressions() render_report(report) write_report_json(report) if report.has_warnings or report.has_errors: raise AssertionError( f"Strict mode: {len(report.violations)} error(s), {len(report.warnings)} warning(s) found" ) # Query count assertion tests class TestQueryCountAssertions: """Test query count assertion utilities.""" def test_query_count_assertion_passes_under_threshold(self): """Query count assertion should pass when under threshold.""" # Simulate a passing check assert_query_count_lte("test_op", max_queries=5, actual=3) def test_query_count_assertion_fails_over_threshold(self): """Query count assertion should fail when over threshold.""" with pytest.raises(AssertionError) as exc_info: assert_query_count_lte("test_op", max_queries=2, actual=5) assert "test_op" in str(exc_info.value) assert "expected <= 2" in str(exc_info.value) assert "got 5" in str(exc_info.value) # Integration tests that run actual benchmarks with query counting class TestQueryCountIntegration: """Integration tests for query counting during actual operations. These tests run actual MCP tool calls and verify query counts stay within expected bounds. """ @pytest.mark.asyncio @pytest.mark.ci_regression async def test_send_message_query_count(self, isolated_env): """Verify send_message stays within query count threshold.""" from fastmcp import Client from mcp_agent_mail.app import build_mcp_server from mcp_agent_mail.db import track_queries server = build_mcp_server() max_queries = 5 # From baselines.json async with Client(server) as client: project_key = "/ci-query-test-send" await client.call_tool("ensure_project", {"human_key": project_key}) agent_result = await client.call_tool( "create_agent_identity", { "project_key": project_key, "program": "ci-test", "model": "test", }, ) agent_name = agent_result.data.get("name") # Track queries during send_message with track_queries() as tracker: await client.call_tool( "send_message", { "project_key": project_key, "sender_name": agent_name, "to": [agent_name], "subject": "CI Query Count Test", "body_md": "Testing query count for CI regression.", }, ) assert_query_count_lte("send_message", max_queries, tracker.total) @pytest.mark.asyncio @pytest.mark.ci_regression async def test_fetch_inbox_query_count(self, isolated_env): """Verify fetch_inbox stays within query count threshold.""" from fastmcp import Client from mcp_agent_mail.app import build_mcp_server from mcp_agent_mail.db import track_queries server = build_mcp_server() max_queries = 3 # From baselines.json async with Client(server) as client: project_key = "/ci-query-test-inbox" await client.call_tool("ensure_project", {"human_key": project_key}) agent_result = await client.call_tool( "create_agent_identity", { "project_key": project_key, "program": "ci-test", "model": "test", }, ) agent_name = agent_result.data.get("name") # Send a few messages first for i in range(5): await client.call_tool( "send_message", { "project_key": project_key, "sender_name": agent_name, "to": [agent_name], "subject": f"Message {i}", "body_md": f"Content {i}", }, ) # Track queries during fetch_inbox with track_queries() as tracker: await client.call_tool( "fetch_inbox", { "project_key": project_key, "agent_name": agent_name, "limit": 20, }, ) assert_query_count_lte("fetch_inbox", max_queries, tracker.total) @pytest.mark.asyncio @pytest.mark.ci_regression async def test_list_outbox_query_count(self, isolated_env): """Verify list_outbox stays within query count threshold.""" from fastmcp import Client from mcp_agent_mail.app import build_mcp_server from mcp_agent_mail.db import track_queries server = build_mcp_server() max_queries = 3 # From baselines.json async with Client(server) as client: project_key = "/ci-query-test-outbox" await client.call_tool("ensure_project", {"human_key": project_key}) agent_result = await client.call_tool( "create_agent_identity", { "project_key": project_key, "program": "ci-test", "model": "test", }, ) agent_name = agent_result.data.get("name") # Send a few messages first for i in range(5): await client.call_tool( "send_message", { "project_key": project_key, "sender_name": agent_name, "to": [agent_name], "subject": f"Outbox test {i}", "body_md": f"Content {i}", }, ) # Track queries during mailbox (list_outbox equivalent via resource) with track_queries() as tracker: resource_uri = f"resource://outbox/{agent_name}?project={project_key}&limit=20" await client.read_resource(resource_uri) assert_query_count_lte("list_outbox", max_queries, tracker.total)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Dicklesworthstone/mcp_agent_mail'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_ci_regression.py•9.77 KiB