Boring Gemini

test_evaluation_advanced.py•12.4 KiB

""" Unit tests for boring.mcp.tools.evaluation - Advanced Evaluation Tools. Tests for: - boring_evaluation_metrics - boring_bias_report - boring_generate_rubric """ import shutil from unittest.mock import MagicMock, patch import pytest @pytest.fixture def temp_project(tmp_path): """Create temporary project directory.""" project = tmp_path / "project" project.mkdir() (project / ".boring.toml").write_text("[project]\nname = 'test'\n") (project / ".boring_memory").mkdir() return project class TestBoringEvaluationMetrics: """Tests for boring_evaluation_metrics tool.""" def test_rate_limited(self): """Test rate limiting.""" from boring.mcp.tools.evaluation import boring_evaluation_metrics with patch( "boring.mcp.tools.evaluation.check_rate_limit", return_value=(False, "Too fast"), ): result = boring_evaluation_metrics() assert "Rate limited" in result def test_no_project(self): """Test when no project found.""" from boring.mcp.tools.evaluation import boring_evaluation_metrics with ( patch("boring.mcp.tools.evaluation.check_rate_limit", return_value=(True, "")), patch("boring.mcp.tools.evaluation.detect_project_root", return_value=None), ): result = boring_evaluation_metrics() assert "No valid Boring project" in result def test_no_memory_dir(self, temp_project): """Test when no evaluation history exists.""" from boring.mcp.tools.evaluation import boring_evaluation_metrics # Remove memory dir shutil.rmtree(temp_project / ".boring_memory") with ( patch("boring.mcp.tools.evaluation.check_rate_limit", return_value=(True, "")), patch( "boring.mcp.tools.evaluation.detect_project_root", return_value=temp_project, ), ): result = boring_evaluation_metrics(project_path=str(temp_project)) assert "No evaluation history found" in result def test_success_with_memory(self, temp_project): """Test successful metrics report.""" from boring.mcp.tools.evaluation import boring_evaluation_metrics with ( patch("boring.mcp.tools.evaluation.check_rate_limit", return_value=(True, "")), patch( "boring.mcp.tools.evaluation.detect_project_root", return_value=temp_project, ), ): result = boring_evaluation_metrics(project_path=str(temp_project)) # Should return a formatted report assert "Evaluation Metrics" in result or "Evaluation Type" in result def test_handles_exception(self, temp_project): """Test error handling.""" from boring.mcp.tools.evaluation import boring_evaluation_metrics with ( patch("boring.mcp.tools.evaluation.check_rate_limit", return_value=(True, "")), patch( "boring.mcp.tools.evaluation.detect_project_root", return_value=temp_project, ), patch( "boring.judge.metrics.generate_metrics_report", side_effect=Exception("Test error"), ), ): result = boring_evaluation_metrics(project_path=str(temp_project)) # May succeed if it uses the "no memory" path, or fail with error assert isinstance(result, str) class TestBoringBiasReport: """Tests for boring_bias_report tool.""" def test_rate_limited(self): """Test rate limiting.""" from boring.mcp.tools.evaluation import boring_bias_report with patch( "boring.mcp.tools.evaluation.check_rate_limit", return_value=(False, "Too fast"), ): result = boring_bias_report() assert "Rate limited" in result def test_no_project(self): """Test when no project found.""" from boring.mcp.tools.evaluation import boring_bias_report with ( patch("boring.mcp.tools.evaluation.check_rate_limit", return_value=(True, "")), patch("boring.mcp.tools.evaluation.detect_project_root", return_value=None), ): result = boring_bias_report() assert "No valid Boring project" in result def test_success(self, temp_project): """Test successful bias report.""" from boring.mcp.tools.evaluation import boring_bias_report mock_monitor = MagicMock() mock_monitor.get_bias_report.return_value = { "position_bias": 0.1, "length_bias": 0.05, "sample_count": 100, } with ( patch("boring.mcp.tools.evaluation.check_rate_limit", return_value=(True, "")), patch( "boring.mcp.tools.evaluation.detect_project_root", return_value=temp_project, ), patch( "boring.judge.bias_monitor.get_bias_monitor", return_value=mock_monitor, ), patch( "boring.judge.bias_monitor.format_bias_report", return_value="# Bias Report\n\nLow bias detected.", ), ): result = boring_bias_report(days=30, project_path=str(temp_project)) assert "Bias Report" in result or "bias" in result.lower() def test_custom_days(self, temp_project): """Test custom days parameter.""" from boring.mcp.tools.evaluation import boring_bias_report mock_monitor = MagicMock() mock_monitor.get_bias_report.return_value = {} with ( patch("boring.mcp.tools.evaluation.check_rate_limit", return_value=(True, "")), patch( "boring.mcp.tools.evaluation.detect_project_root", return_value=temp_project, ), patch( "boring.judge.bias_monitor.get_bias_monitor", return_value=mock_monitor, ), patch("boring.judge.bias_monitor.format_bias_report", return_value="OK"), ): boring_bias_report(days=7, project_path=str(temp_project)) mock_monitor.get_bias_report.assert_called_once_with(days=7) def test_handles_exception(self, temp_project): """Test error handling.""" from boring.mcp.tools.evaluation import boring_bias_report with ( patch("boring.mcp.tools.evaluation.check_rate_limit", return_value=(True, "")), patch( "boring.mcp.tools.evaluation.detect_project_root", return_value=temp_project, ), patch( "boring.judge.bias_monitor.get_bias_monitor", side_effect=Exception("Test error"), ), ): result = boring_bias_report(project_path=str(temp_project)) assert "Error" in result class TestBoringGenerateRubric: """Tests for boring_generate_rubric tool.""" def test_rate_limited(self): """Test rate limiting.""" from boring.mcp.tools.evaluation import boring_generate_rubric with patch( "boring.mcp.tools.evaluation.check_rate_limit", return_value=(False, "Too fast"), ): result = boring_generate_rubric(name="Test") assert "Rate limited" in result def test_default_criteria_code_quality(self): """Test default criteria for code_quality domain.""" from boring.mcp.tools.evaluation import boring_generate_rubric mock_rubric = MagicMock() mock_rubric.name = "Test" with ( patch("boring.mcp.tools.evaluation.check_rate_limit", return_value=(True, "")), patch( "boring.judge.rubric_generator.generate_rubric", return_value=mock_rubric, ) as mock_gen, patch( "boring.judge.rubric_generator.rubric_to_prompt", return_value="**Rubric Prompt**", ), ): result = boring_generate_rubric(name="Test", domain="code_quality") # Check that default criteria were used call_args = mock_gen.call_args assert "Readability" in call_args.kwargs["criteria_names"] assert "Documentation" in call_args.kwargs["criteria_names"] assert "Generated Rubric" in result def test_custom_criteria(self): """Test custom criteria parameter.""" from boring.mcp.tools.evaluation import boring_generate_rubric mock_rubric = MagicMock() with ( patch("boring.mcp.tools.evaluation.check_rate_limit", return_value=(True, "")), patch( "boring.judge.rubric_generator.generate_rubric", return_value=mock_rubric, ) as mock_gen, patch("boring.judge.rubric_generator.rubric_to_prompt", return_value="OK"), ): boring_generate_rubric( name="Custom", criteria="Speed, Accuracy, Safety", ) call_args = mock_gen.call_args assert "Speed" in call_args.kwargs["criteria_names"] assert "Accuracy" in call_args.kwargs["criteria_names"] assert "Safety" in call_args.kwargs["criteria_names"] def test_security_domain(self): """Test security domain default criteria.""" from boring.mcp.tools.evaluation import boring_generate_rubric mock_rubric = MagicMock() with ( patch("boring.mcp.tools.evaluation.check_rate_limit", return_value=(True, "")), patch( "boring.judge.rubric_generator.generate_rubric", return_value=mock_rubric, ) as mock_gen, patch("boring.judge.rubric_generator.rubric_to_prompt", return_value="OK"), ): boring_generate_rubric(name="Security Check", domain="security") call_args = mock_gen.call_args assert "Secrets Management" in call_args.kwargs["criteria_names"] assert "Input Validation" in call_args.kwargs["criteria_names"] def test_strictness_levels(self): """Test different strictness levels.""" from boring.mcp.tools.evaluation import boring_generate_rubric mock_rubric = MagicMock() for strictness in ["lenient", "balanced", "strict"]: with ( patch("boring.mcp.tools.evaluation.check_rate_limit", return_value=(True, "")), patch( "boring.judge.rubric_generator.generate_rubric", return_value=mock_rubric, ) as mock_gen, patch("boring.judge.rubric_generator.rubric_to_prompt", return_value="OK"), ): result = boring_generate_rubric(name="Test", strictness=strictness) call_args = mock_gen.call_args assert call_args.kwargs["strictness"] == strictness assert strictness in result def test_output_format(self): """Test output contains expected sections.""" from boring.mcp.tools.evaluation import boring_generate_rubric mock_rubric = MagicMock() with ( patch("boring.mcp.tools.evaluation.check_rate_limit", return_value=(True, "")), patch( "boring.judge.rubric_generator.generate_rubric", return_value=mock_rubric, ), patch( "boring.judge.rubric_generator.rubric_to_prompt", return_value="Level 5: Excellent\nLevel 4: Good", ), ): result = boring_generate_rubric( name="API Quality", domain="code_quality", strictness="balanced", ) assert "📏 Generated Rubric" in result assert "API Quality" in result assert "code_quality" in result assert "balanced" in result assert "Level 5" in result def test_handles_exception(self): """Test error handling.""" from boring.mcp.tools.evaluation import boring_generate_rubric with ( patch("boring.mcp.tools.evaluation.check_rate_limit", return_value=(True, "")), patch( "boring.judge.rubric_generator.generate_rubric", side_effect=Exception("Test error"), ), ): result = boring_generate_rubric(name="Test") assert "Error" in result

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Boring206/boring-gemini'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_evaluation_advanced.py•12.4 KiB