Skip to main content
Glama

Katamari MCP Server

by ciphernaut
test_acp_sandbox_integration.py10.6 kB
""" Integration tests for ACP Controller with Sandbox execution. Tests the full pipeline from LLM generation to safe execution. """ import pytest import asyncio import sys from pathlib import Path # Add the project root to the path sys.path.insert(0, str(Path(__file__).parent.parent)) from katamari_mcp.acp.controller import ACPController class TestACPSandboxIntegration: """Test ACP Controller integration with sandbox.""" @pytest.fixture def acp_controller(self): """Create ACP controller instance.""" return ACPController() @pytest.mark.asyncio async def test_simple_capability_generation_and_execution(self, acp_controller): """Test generating and executing a simple capability.""" # Define a simple capability request capability_request = { "name": "simple_calculator", "description": "A simple calculator that adds two numbers", "tags": ["math", "utility"], "proposed_code": None # Will be generated by LLM } # Generate code using LLM prompt = f"Generate a Python function for {capability_request['description']}. The function should be named '{capability_request['name']}' and handle {capability_request['description'].lower()}." generated_text = await acp_controller.llm_client.generate( prompt, max_tokens=1024, temperature=0.7 ) # Extract code from response generated_code = acp_controller._extract_code_from_response(generated_text, prompt) assert generated_code is not None assert len(generated_code) > 0 # Validate the generated code validation_issues = await acp_controller._validate_code_in_sandbox(generated_code) assert len(validation_issues) == 0, f"Validation failed: {validation_issues}" # Execute the capability execution_result = await acp_controller.sandbox.execute_capability( generated_code, capability_name=capability_request["name"], parameters={"a": 5, "b": 3} ) assert execution_result.success is True assert execution_result.data == 8 # 5 + 3 = 8 assert execution_result.execution_time > 0 @pytest.mark.asyncio async def test_data_processing_capability(self, acp_controller): """Test generating and executing a data processing capability.""" capability_request = { "name": "data_processor", "description": "Process a list of numbers and return statistics", "tags": ["data", "analysis"] } # Generate code prompt = f"Generate a Python function for {capability_request['description']}. The function should process a list of numbers and return statistics including sum, average, min, and max." generated_text = await acp_controller.llm_client.generate( prompt, max_tokens=1024, temperature=0.7 ) # Extract code from response generated_code = acp_controller._extract_code_from_response(generated_text, prompt) # Validate and execute validation_issues = await acp_controller._validate_code_in_sandbox(generated_code) assert len(validation_issues) == 0 execution_result = await acp_controller.sandbox.execute_capability( generated_code, capability_name=capability_request["name"], parameters={"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]} ) assert execution_result.success is True result_data = execution_result.data assert result_data["sum"] == 55 assert result_data["average"] == 5.5 assert result_data["min"] == 1 assert result_data["max"] == 10 @pytest.mark.asyncio async def test_capability_with_error_handling(self, acp_controller): """Test that capabilities handle errors gracefully.""" capability_request = { "name": "safe_divider", "description": "Safely divide two numbers with error handling", "tags": ["math", "safety"] } # Generate code prompt = f"Generate a Python function for {capability_request['description']}. The function should handle division by zero gracefully and return appropriate error messages." generated_text = await acp_controller.llm_client.generate( prompt, max_tokens=1024, temperature=0.7 ) # Extract code from response generated_code = acp_controller._extract_code_from_response(generated_text, prompt) # Validate validation_issues = await acp_controller._validate_code_in_sandbox(generated_code) assert len(validation_issues) == 0 # Test normal case normal_result = await acp_controller.sandbox.execute_capability( generated_code, capability_name=capability_request["name"], parameters={"numerator": 10, "denominator": 2} ) assert normal_result.success is True assert normal_result.data == 5 # Test error case (division by zero) error_result = await acp_controller.sandbox.execute_capability( generated_code, capability_name=capability_request["name"], parameters={"numerator": 10, "denominator": 0} ) # Should handle error gracefully (either return error indicator or specific message) assert error_result.success is True # Capability should handle the error internally @pytest.mark.asyncio async def test_restricted_imports_blocked(self, acp_controller): """Test that dangerous imports are properly blocked.""" malicious_code = ''' import subprocess import os result = os.system("echo 'This should not work'") ''' # Should fail validation validation_issues = await acp_controller._validate_code_in_sandbox(malicious_code) assert len(validation_issues) > 0 assert any("subprocess" in issue for issue in validation_issues) # Should fail execution execution_result = await acp_controller.sandbox.execute_capability( malicious_code, capability_name="malicious_test" ) assert execution_result.success is False assert "ImportError" in execution_result.error or "not allowed" in execution_result.error.lower() @pytest.mark.asyncio async def test_resource_limits_enforced(self, acp_controller): """Test that resource limits are properly enforced.""" # Create a sandbox with strict limits from katamari_mcp.acp.sandbox import SandboxConfig, CapabilitySandbox config = SandboxConfig() config.max_cpu_time = 2 # 2 second limit strict_sandbox = CapabilitySandbox(config) # Code that would run indefinitely infinite_loop_code = ''' import time count = 0 while True: count += 1 if count > 1000000: # Prevent actual infinite loop in test break result = count ''' # Should timeout or complete quickly result = await strict_sandbox.execute_capability( infinite_loop_code, capability_name="timeout_test" ) # Either succeeds quickly or times out if result.success: assert result.execution_time < 5.0 # Should complete quickly else: assert "timeout" in result.error.lower() or "time" in result.error.lower() @pytest.mark.asyncio async def test_full_acp_pipeline(self, acp_controller): """Test the complete ACP pipeline from request to execution.""" # Step 1: Define capability need capability_need = { "name": "text_analyzer", "description": "Analyze text and return word count and character count", "tags": ["text", "analysis"], "priority": "medium" } # Step 2: Generate capability code prompt = f"Generate a Python function for {capability_need['description']}. The function should analyze text and return word count and character count." generated_text = await acp_controller.llm_client.generate( prompt, max_tokens=1024, temperature=0.7 ) # Extract code from response generated_code = acp_controller._extract_code_from_response(generated_text, prompt) assert generated_code is not None assert "def " in generated_code or "result = " in generated_code # Step 3: Validate with heuristics heuristic_result = acp_controller.heuristic_engine.evaluate_capability( capability_need["name"], generated_code, capability_need["tags"] ) assert heuristic_result["approved"] is True # Step 4: Validate in sandbox validation_issues = await acp_controller._validate_code_in_sandbox(generated_code) assert len(validation_issues) == 0 # Step 5: Execute with test data test_text = "Hello world! This is a test." execution_result = await acp_controller.sandbox.execute_capability( generated_code, capability_name=capability_need["name"], parameters={"text": test_text} ) assert execution_result.success is True result_data = execution_result.data # Verify results if isinstance(result_data, dict): assert "word_count" in result_data or "words" in result_data assert "char_count" in result_data or "characters" in result_data else: # If returns a simple count or structure assert result_data is not None # Step 6: Collect feedback (simulated) await acp_controller.feedback_collector.collect_execution_feedback( capability_name=capability_need["name"], execution_result=execution_result, user_satisfaction=5.0, execution_context={"test_case": "basic_text"} ) # Verify pipeline completed successfully assert execution_result.success is True assert execution_result.execution_time > 0 if __name__ == "__main__": pytest.main([__file__, "-v"])

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ciphernaut/katamari-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server