Skip to main content
Glama

R Econometrics MCP Server

MIT License
187
  • Linux
  • Apple
test_mcp_protocol_validation.py12.1 kB
#!/usr/bin/env python3 """ MCP Protocol validation tests for all RMCP tools. Tests complete MCP protocol integration flow for each tool with mocked R responses, ensuring all tool paths work end-to-end without requiring R environment. """ import sys from contextlib import ExitStack from pathlib import Path from unittest.mock import patch import pytest import pytest_asyncio # rmcp package installed via pip install -e . from rmcp.core.context import Context from rmcp.core.server import create_server from rmcp.registries.tools import register_tool_functions # Import all tools from rmcp.tools.descriptive import frequency_table, outlier_detection, summary_stats from rmcp.tools.econometrics import instrumental_variables, panel_regression, var_model from rmcp.tools.fileops import ( data_info, filter_data, read_csv, read_excel, read_json, write_csv, write_excel, write_json, ) from rmcp.tools.formula_builder import build_formula, validate_formula from rmcp.tools.helpers import load_example, suggest_fix, validate_data from rmcp.tools.machine_learning import decision_tree, kmeans_clustering, random_forest from rmcp.tools.regression import ( correlation_analysis, linear_model, logistic_regression, ) from rmcp.tools.statistical_tests import ( anova, chi_square_test, normality_test, t_test, ) from rmcp.tools.timeseries import ( arima_model, decompose_timeseries, stationarity_test, ) from rmcp.tools.transforms import ( difference, lag_lead, standardize, winsorize, ) from rmcp.tools.visualization import ( boxplot, correlation_heatmap, histogram, regression_plot, scatter_plot, time_series_plot, ) from tests.utils import extract_json_content, extract_text_summary # Test data sets - minimal viable data for each category BASIC_DATA = {"x": [1, 2, 3, 4, 5], "y": [2, 4, 6, 8, 10]} CATEGORICAL_DATA = { "category": ["A", "B", "A", "B", "A"], "value": [10, 20, 15, 25, 12], } TIME_SERIES_DATA = {"values": [100, 102, 98, 105, 108, 110, 95, 100, 103, 107]} PANEL_DATA = { "id": [1, 1, 2, 2, 3, 3], "time": [1, 2, 1, 2, 1, 2], "y": [10, 12, 15, 18, 20, 22], "x": [5, 6, 7, 8, 9, 10], } BINARY_DATA = {"outcome": [0, 1, 0, 1, 0], "predictor": [1, 3, 2, 4, 1]} # Mock R execution results for each tool category MOCK_RESPONSES = { # Regression tools "linear_model": { "coefficients": {"(Intercept)": 0.0, "x": 2.0}, "r_squared": 0.99, "p_values": {"(Intercept)": 0.05, "x": 0.001}, "residuals": [0.1, -0.1, 0.0, 0.1, -0.1], "n_obs": 5, "degrees_freedom": 3, "formula": "y ~ x", "variables": ["x", "y"], }, "correlation_analysis": { "correlation_matrix": {"x": [1.0, 0.99], "y": [0.99, 1.0]}, "p_values": {"x": [0.0, 0.001], "y": [0.001, 0.0]}, "n_obs": 5, "method": "pearson", "variables": ["x", "y"], }, "logistic_regression": { "coefficients": {"(Intercept)": -1.0, "predictor": 0.5}, "odds_ratios": {"(Intercept)": 0.37, "predictor": 1.65}, "p_values": {"(Intercept)": 0.1, "predictor": 0.05}, "accuracy": 0.8, "deviance": 5.2, "aic": 7.4, "n_obs": 5, }, # Statistical tests "t_test": { "statistic": 2.5, "p_value": 0.03, "confidence_interval": {"lower": 0.1, "upper": 3.9, "level": 0.95}, "mean": 2.0, "alternative": "two.sided", "test_type": "one_sample", "variable": "x", }, "anova": { "f_statistic": 15.2, "p_value": 0.01, "df_between": 2, "df_within": 12, "sum_sq": [100, 50], "mean_sq": [50, 4.2], "anova_table": {"p_value": {"1": 0.01}}, "formula": "y ~ group", }, "chi_square_test": { "statistic": 8.5, "p_value": 0.02, "df": 2, "test_type": "independence", "observed": [[10, 5], [8, 12]], "expected": [[9, 6], [9, 11]], "expected_frequencies": [[9, 6], [9, 11]], }, "normality_test": { "test": "shapiro", "statistic": 0.95, "p_value": 0.7, "is_normal": True, "interpretation": "Data appears normally distributed", "test_name": "Shapiro-Wilk", "variable": "x", }, # Add other mock responses with required fields... "summary_stats": { "statistics": { "x": {"mean": 3.0, "sd": 1.58, "min": 1, "max": 5}, "y": {"mean": 6.0, "sd": 3.16, "min": 2, "max": 10}, }, "n_obs": 5, "variables": ["x", "y"], }, "outlier_detection": { "outliers": [5], "outlier_indices": [4], "method": "iqr", "threshold": 1.5, "n_outliers": 1, "variable": "x", "n_obs": 5, }, "frequency_table": { "frequency_tables": {"category": {"A": 3, "B": 2}}, "percentages": {"category": {"A": 60.0, "B": 40.0}}, "n_total": 5, "variables": ["category"], }, } @pytest_asyncio.fixture async def test_server(): """Create server with all tools registered.""" server = create_server() # Register all tools exactly as in CLI register_tool_functions( server.tools, # Regression tools linear_model, correlation_analysis, logistic_regression, # Statistical tests t_test, anova, chi_square_test, normality_test, # Descriptive statistics summary_stats, outlier_detection, frequency_table, # Data transformations lag_lead, winsorize, difference, standardize, # Time series analysis arima_model, decompose_timeseries, stationarity_test, # Machine learning kmeans_clustering, decision_tree, random_forest, # Econometrics panel_regression, instrumental_variables, var_model, # Visualization scatter_plot, histogram, boxplot, time_series_plot, correlation_heatmap, regression_plot, # File operations read_csv, write_csv, data_info, filter_data, read_excel, read_json, write_csv, write_excel, write_json, # Natural language tools build_formula, validate_formula, # Helper tools suggest_fix, validate_data, load_example, ) return server @pytest.fixture def mock_r_patches(): """Create mock patches for R execution.""" patches = [ patch( "rmcp.tools.statistical_tests.execute_r_script_async", side_effect=lambda r_script, params: MOCK_RESPONSES.get( params.get("tool_name", "generic"), {"status": "mocked"} ), ), patch( "rmcp.tools.descriptive.execute_r_script_async", side_effect=lambda r_script, params: MOCK_RESPONSES.get( params.get("tool_name", "generic"), {"status": "mocked"} ), ), patch( "rmcp.tools.regression.execute_r_script_async", side_effect=lambda r_script, params: MOCK_RESPONSES.get( params.get("tool_name", "generic"), {"status": "mocked"} ), ), # Add other patches as needed ] return patches async def run_tool_protocol_test(server, tool_name, test_data, mock_patches): """Test individual tool through MCP protocol.""" # Create MCP request request = { "jsonrpc": "2.0", "id": 1, "method": "tools/call", "params": { "name": tool_name, "arguments": {**test_data, "tool_name": tool_name}, }, } try: # Apply mock patches and execute with ExitStack() as stack: for patch_obj in mock_patches: stack.enter_context(patch_obj) response = await server.handle_request(request) # Validate response structure if "result" not in response: return False, f"No result in response: {response}" if "content" not in response["result"]: return False, f"No content in result: {response['result']}" content = response["result"]["content"] if not isinstance(content, list) or len(content) == 0: return False, f"Invalid content format: {content}" # Verify we get human-readable summary summary = extract_text_summary(response) if not summary.strip(): return False, "No human-readable summary returned" return True, "Protocol validation successful" except Exception as e: return False, f"Tool execution failed: {str(e)}" # Individual test cases for different tool categories @pytest.mark.asyncio async def test_regression_tools_protocol(test_server, mock_r_patches): """Test regression tools MCP protocol compliance.""" test_cases = [ ("linear_model", {"data": BASIC_DATA, "formula": "y ~ x"}), ("correlation_analysis", {"data": BASIC_DATA, "variables": ["x", "y"]}), ( "logistic_regression", {"data": BINARY_DATA, "formula": "outcome ~ predictor"}, ), ] for tool_name, test_data in test_cases: success, result = await run_tool_protocol_test( test_server, tool_name, test_data, mock_r_patches ) assert success, f"{tool_name} protocol test failed: {result}" @pytest.mark.asyncio async def test_statistical_tests_protocol(test_server, mock_r_patches): """Test statistical tests MCP protocol compliance.""" test_cases = [ ("t_test", {"data": BASIC_DATA, "variable": "x"}), ("anova", {"data": CATEGORICAL_DATA, "formula": "value ~ category"}), ( "chi_square_test", { "data": CATEGORICAL_DATA, "test_type": "independence", "x": "category", "y": "value", }, ), ("normality_test", {"data": BASIC_DATA, "variable": "x"}), ] for tool_name, test_data in test_cases: success, result = await run_tool_protocol_test( test_server, tool_name, test_data, mock_r_patches ) assert success, f"{tool_name} protocol test failed: {result}" @pytest.mark.asyncio async def test_descriptive_stats_protocol(test_server, mock_r_patches): """Test descriptive statistics MCP protocol compliance.""" test_cases = [ ("summary_stats", {"data": BASIC_DATA, "variables": ["x", "y"]}), ("outlier_detection", {"data": BASIC_DATA, "variable": "x"}), ("frequency_table", {"data": CATEGORICAL_DATA, "variables": ["category"]}), ] for tool_name, test_data in test_cases: success, result = await run_tool_protocol_test( test_server, tool_name, test_data, mock_r_patches ) assert success, f"{tool_name} protocol test failed: {result}" # Add a comprehensive test that validates the overall protocol @pytest.mark.asyncio async def test_mcp_protocol_comprehensive_validation(test_server, mock_r_patches): """Comprehensive MCP protocol validation across all tool categories.""" # Test server basic functionality context = Context.create("test", "test", test_server.lifespan_state) tools_list = await test_server.tools.list_tools(context) # Verify we have tools registered assert len(tools_list["tools"]) > 0, "No tools registered in server" # Test basic MCP protocol compliance with a simple tool success, result = await run_tool_protocol_test( test_server, "summary_stats", {"data": BASIC_DATA, "variables": ["x", "y"]}, mock_r_patches, ) assert success, f"Basic MCP protocol test failed: {result}"

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/finite-sample/rmcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server