Skip to main content
Glama

Katamari MCP Server

by ciphernaut
test_sandbox_integration_simple.py11.1 kB
""" Simple integration tests for sandbox execution without LLM dependency. """ import pytest import asyncio import sys from pathlib import Path # Add the project root to the path sys.path.insert(0, str(Path(__file__).parent.parent)) from katamari_mcp.acp.controller import ACPController class TestSandboxIntegrationSimple: """Test sandbox integration with predefined code.""" @pytest.fixture def acp_controller(self): """Create ACP controller instance.""" return ACPController() @pytest.mark.asyncio async def test_predefined_capability_execution(self, acp_controller): """Test executing a predefined capability in the sandbox.""" # Predefined capability code calculator_code = ''' def simple_calculator(a, b): """Simple calculator that adds two numbers.""" return a + b # Execute the function result = simple_calculator(parameters.get("a", 0), parameters.get("b", 0)) ''' # Validate the code validation_issues = await acp_controller._validate_code_in_sandbox(calculator_code) assert len(validation_issues) == 0, f"Validation failed: {validation_issues}" # Execute the capability execution_result = await acp_controller.sandbox.execute_capability( calculator_code, capability_name="simple_calculator", parameters={"a": 15, "b": 7} ) assert execution_result.success is True assert execution_result.data == 22 # 15 + 7 = 22 assert execution_result.execution_time > 0 @pytest.mark.asyncio async def test_data_processing_capability(self, acp_controller): """Test a data processing capability.""" data_processor_code = ''' import json def analyze_data(data): """Analyze a list of numbers and return statistics.""" if not isinstance(data, list): raise ValueError("Data must be a list") if len(data) == 0: return {"count": 0, "sum": 0, "average": 0, "min": 0, "max": 0} numeric_data = [x for x in data if isinstance(x, (int, float))] if not numeric_data: return {"count": len(data), "sum": 0, "average": 0, "min": 0, "max": 0} return { "count": len(numeric_data), "sum": sum(numeric_data), "average": sum(numeric_data) / len(numeric_data), "min": min(numeric_data), "max": max(numeric_data) } # Execute the analysis result = analyze_data(parameters.get("data", [])) ''' # Validate and execute validation_issues = await acp_controller._validate_code_in_sandbox(data_processor_code) assert len(validation_issues) == 0 execution_result = await acp_controller.sandbox.execute_capability( data_processor_code, capability_name="data_analyzer", parameters={"data": [5, 10, 15, 20, 25]} ) assert execution_result.success is True result_data = execution_result.data assert result_data["count"] == 5 assert result_data["sum"] == 75 assert result_data["average"] == 15.0 assert result_data["min"] == 5 assert result_data["max"] == 25 @pytest.mark.asyncio async def test_error_handling_capability(self, acp_controller): """Test a capability with error handling.""" safe_divider_code = ''' def safe_divide(numerator, denominator): """Safely divide two numbers with error handling.""" try: if denominator == 0: return {"success": False, "error": "Division by zero", "result": None} result = numerator / denominator return {"success": True, "error": None, "result": result} except Exception as e: return {"success": False, "error": str(e), "result": None} # Execute the function result = safe_divide( parameters.get("numerator", 0), parameters.get("denominator", 1) ) ''' # Validate validation_issues = await acp_controller._validate_code_in_sandbox(safe_divider_code) assert len(validation_issues) == 0 # Test normal case normal_result = await acp_controller.sandbox.execute_capability( safe_divider_code, capability_name="safe_divider", parameters={"numerator": 20, "denominator": 4} ) assert normal_result.success is True assert normal_result.data["success"] is True assert normal_result.data["result"] == 5.0 # Test error case error_result = await acp_controller.sandbox.execute_capability( safe_divider_code, capability_name="safe_divider", parameters={"numerator": 20, "denominator": 0} ) assert error_result.success is True # Capability itself handled the error assert error_result.data["success"] is False assert "Division by zero" in error_result.data["error"] @pytest.mark.asyncio async def test_restricted_imports_blocked(self, acp_controller): """Test that dangerous imports are properly blocked.""" malicious_code = ''' import subprocess result = "should not reach here" ''' # Should fail validation validation_issues = await acp_controller._validate_code_in_sandbox(malicious_code) assert len(validation_issues) > 0 assert any("subprocess" in issue for issue in validation_issues) # Should fail execution execution_result = await acp_controller.sandbox.execute_capability( malicious_code, capability_name="malicious_test" ) assert execution_result.success is False assert "ImportError" in execution_result.error or "not allowed" in execution_result.error.lower() @pytest.mark.asyncio async def test_heuristic_evaluation(self, acp_controller): """Test heuristic evaluation of capabilities.""" safe_capability = ''' def text_processor(text): """Process text and return basic statistics.""" if not isinstance(text, str): raise ValueError("Input must be a string") words = text.split() return { "word_count": len(words), "char_count": len(text), "char_count_no_spaces": len(text.replace(" ", "")) } result = text_processor(parameters.get("text", "")) ''' # Evaluate with heuristics heuristic_result = acp_controller.heuristic_engine.evaluate_capability( "text_processor", safe_capability, ["text", "utility", "safe"] ) assert heuristic_result["approved"] is True assert heuristic_result["risk_score"] < 0.5 # Should be low risk assert len(heuristic_result["applied_tags"]) > 0 # Validate in sandbox validation_issues = await acp_controller._validate_code_in_sandbox(safe_capability) assert len(validation_issues) == 0 # Execute execution_result = await acp_controller.sandbox.execute_capability( safe_capability, capability_name="text_processor", parameters={"text": "Hello world! This is a test."} ) assert execution_result.success is True result_data = execution_result.data assert result_data["word_count"] == 6 assert result_data["char_count"] == 29 assert result_data["char_count_no_spaces"] == 24 @pytest.mark.asyncio async def test_full_pipeline_without_llm(self, acp_controller): """Test the complete pipeline except LLM generation.""" # Step 1: Define capability need (simulated) capability_need = { "name": "number_validator", "description": "Validate if a number is within a specified range", "tags": ["validation", "utility"], "priority": "medium" } # Step 2: Use predefined code instead of LLM generation predefined_code = ''' def validate_range(number, min_val, max_val): """Validate if a number is within the specified range.""" try: num = float(number) if min_val <= num <= max_val: return { "valid": True, "number": num, "range": [min_val, max_val], "message": f"{num} is within range [{min_val}, {max_val}]" } else: return { "valid": False, "number": num, "range": [min_val, max_val], "message": f"{num} is outside range [{min_val}, {max_val}]" } except (ValueError, TypeError): return { "valid": False, "error": f"Invalid number: {number}", "range": [min_val, max_val] } # Execute validation result = validate_range( parameters.get("number", 0), parameters.get("min_val", 0), parameters.get("max_val", 100) ) ''' # Step 3: Validate with heuristics heuristic_result = acp_controller.heuristic_engine.evaluate_capability( capability_need["name"], predefined_code, capability_need["tags"] ) assert heuristic_result["approved"] is True assert heuristic_result["risk_score"] < 0.3 # Should be very low risk # Step 4: Validate in sandbox validation_issues = await acp_controller._validate_code_in_sandbox(predefined_code) assert len(validation_issues) == 0 # Step 5: Execute with test data test_cases = [ {"number": 50, "min_val": 0, "max_val": 100}, # Valid {"number": 150, "min_val": 0, "max_val": 100}, # Invalid (too high) {"number": -10, "min_val": 0, "max_val": 100}, # Invalid (too low) {"number": "invalid", "min_val": 0, "max_val": 100} # Invalid (not a number) ] for i, test_params in enumerate(test_cases): execution_result = await acp_controller.sandbox.execute_capability( predefined_code, capability_name=capability_need["name"], parameters=test_params ) assert execution_result.success is True result_data = execution_result.data if i == 0: # Valid case assert result_data["valid"] is True else: # Invalid cases assert result_data["valid"] is False # Step 6: Collect feedback (simulated) await acp_controller.feedback_collector.collect_execution_feedback( capability_name=capability_need["name"], execution_result=execution_result, user_satisfaction=4.5, execution_context={"test_cases": len(test_cases)} ) # Verify pipeline completed successfully assert execution_result.success is True assert execution_result.execution_time > 0 if __name__ == "__main__": pytest.main([__file__, "-v"])

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ciphernaut/katamari-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server