test_acp_sandbox_integration.py•10.6 kB
"""
Integration tests for ACP Controller with Sandbox execution.
Tests the full pipeline from LLM generation to safe execution.
"""
import pytest
import asyncio
import sys
from pathlib import Path
# Add the project root to the path
sys.path.insert(0, str(Path(__file__).parent.parent))
from katamari_mcp.acp.controller import ACPController
class TestACPSandboxIntegration:
"""Test ACP Controller integration with sandbox."""
@pytest.fixture
def acp_controller(self):
"""Create ACP controller instance."""
return ACPController()
@pytest.mark.asyncio
async def test_simple_capability_generation_and_execution(self, acp_controller):
"""Test generating and executing a simple capability."""
# Define a simple capability request
capability_request = {
"name": "simple_calculator",
"description": "A simple calculator that adds two numbers",
"tags": ["math", "utility"],
"proposed_code": None # Will be generated by LLM
}
# Generate code using LLM
prompt = f"Generate a Python function for {capability_request['description']}. The function should be named '{capability_request['name']}' and handle {capability_request['description'].lower()}."
generated_text = await acp_controller.llm_client.generate(
prompt,
max_tokens=1024,
temperature=0.7
)
# Extract code from response
generated_code = acp_controller._extract_code_from_response(generated_text, prompt)
assert generated_code is not None
assert len(generated_code) > 0
# Validate the generated code
validation_issues = await acp_controller._validate_code_in_sandbox(generated_code)
assert len(validation_issues) == 0, f"Validation failed: {validation_issues}"
# Execute the capability
execution_result = await acp_controller.sandbox.execute_capability(
generated_code,
capability_name=capability_request["name"],
parameters={"a": 5, "b": 3}
)
assert execution_result.success is True
assert execution_result.data == 8 # 5 + 3 = 8
assert execution_result.execution_time > 0
@pytest.mark.asyncio
async def test_data_processing_capability(self, acp_controller):
"""Test generating and executing a data processing capability."""
capability_request = {
"name": "data_processor",
"description": "Process a list of numbers and return statistics",
"tags": ["data", "analysis"]
}
# Generate code
prompt = f"Generate a Python function for {capability_request['description']}. The function should process a list of numbers and return statistics including sum, average, min, and max."
generated_text = await acp_controller.llm_client.generate(
prompt,
max_tokens=1024,
temperature=0.7
)
# Extract code from response
generated_code = acp_controller._extract_code_from_response(generated_text, prompt)
# Validate and execute
validation_issues = await acp_controller._validate_code_in_sandbox(generated_code)
assert len(validation_issues) == 0
execution_result = await acp_controller.sandbox.execute_capability(
generated_code,
capability_name=capability_request["name"],
parameters={"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
)
assert execution_result.success is True
result_data = execution_result.data
assert result_data["sum"] == 55
assert result_data["average"] == 5.5
assert result_data["min"] == 1
assert result_data["max"] == 10
@pytest.mark.asyncio
async def test_capability_with_error_handling(self, acp_controller):
"""Test that capabilities handle errors gracefully."""
capability_request = {
"name": "safe_divider",
"description": "Safely divide two numbers with error handling",
"tags": ["math", "safety"]
}
# Generate code
prompt = f"Generate a Python function for {capability_request['description']}. The function should handle division by zero gracefully and return appropriate error messages."
generated_text = await acp_controller.llm_client.generate(
prompt,
max_tokens=1024,
temperature=0.7
)
# Extract code from response
generated_code = acp_controller._extract_code_from_response(generated_text, prompt)
# Validate
validation_issues = await acp_controller._validate_code_in_sandbox(generated_code)
assert len(validation_issues) == 0
# Test normal case
normal_result = await acp_controller.sandbox.execute_capability(
generated_code,
capability_name=capability_request["name"],
parameters={"numerator": 10, "denominator": 2}
)
assert normal_result.success is True
assert normal_result.data == 5
# Test error case (division by zero)
error_result = await acp_controller.sandbox.execute_capability(
generated_code,
capability_name=capability_request["name"],
parameters={"numerator": 10, "denominator": 0}
)
# Should handle error gracefully (either return error indicator or specific message)
assert error_result.success is True # Capability should handle the error internally
@pytest.mark.asyncio
async def test_restricted_imports_blocked(self, acp_controller):
"""Test that dangerous imports are properly blocked."""
malicious_code = '''
import subprocess
import os
result = os.system("echo 'This should not work'")
'''
# Should fail validation
validation_issues = await acp_controller._validate_code_in_sandbox(malicious_code)
assert len(validation_issues) > 0
assert any("subprocess" in issue for issue in validation_issues)
# Should fail execution
execution_result = await acp_controller.sandbox.execute_capability(
malicious_code,
capability_name="malicious_test"
)
assert execution_result.success is False
assert "ImportError" in execution_result.error or "not allowed" in execution_result.error.lower()
@pytest.mark.asyncio
async def test_resource_limits_enforced(self, acp_controller):
"""Test that resource limits are properly enforced."""
# Create a sandbox with strict limits
from katamari_mcp.acp.sandbox import SandboxConfig, CapabilitySandbox
config = SandboxConfig()
config.max_cpu_time = 2 # 2 second limit
strict_sandbox = CapabilitySandbox(config)
# Code that would run indefinitely
infinite_loop_code = '''
import time
count = 0
while True:
count += 1
if count > 1000000: # Prevent actual infinite loop in test
break
result = count
'''
# Should timeout or complete quickly
result = await strict_sandbox.execute_capability(
infinite_loop_code,
capability_name="timeout_test"
)
# Either succeeds quickly or times out
if result.success:
assert result.execution_time < 5.0 # Should complete quickly
else:
assert "timeout" in result.error.lower() or "time" in result.error.lower()
@pytest.mark.asyncio
async def test_full_acp_pipeline(self, acp_controller):
"""Test the complete ACP pipeline from request to execution."""
# Step 1: Define capability need
capability_need = {
"name": "text_analyzer",
"description": "Analyze text and return word count and character count",
"tags": ["text", "analysis"],
"priority": "medium"
}
# Step 2: Generate capability code
prompt = f"Generate a Python function for {capability_need['description']}. The function should analyze text and return word count and character count."
generated_text = await acp_controller.llm_client.generate(
prompt,
max_tokens=1024,
temperature=0.7
)
# Extract code from response
generated_code = acp_controller._extract_code_from_response(generated_text, prompt)
assert generated_code is not None
assert "def " in generated_code or "result = " in generated_code
# Step 3: Validate with heuristics
heuristic_result = acp_controller.heuristic_engine.evaluate_capability(
capability_need["name"],
generated_code,
capability_need["tags"]
)
assert heuristic_result["approved"] is True
# Step 4: Validate in sandbox
validation_issues = await acp_controller._validate_code_in_sandbox(generated_code)
assert len(validation_issues) == 0
# Step 5: Execute with test data
test_text = "Hello world! This is a test."
execution_result = await acp_controller.sandbox.execute_capability(
generated_code,
capability_name=capability_need["name"],
parameters={"text": test_text}
)
assert execution_result.success is True
result_data = execution_result.data
# Verify results
if isinstance(result_data, dict):
assert "word_count" in result_data or "words" in result_data
assert "char_count" in result_data or "characters" in result_data
else:
# If returns a simple count or structure
assert result_data is not None
# Step 6: Collect feedback (simulated)
await acp_controller.feedback_collector.collect_execution_feedback(
capability_name=capability_need["name"],
execution_result=execution_result,
user_satisfaction=5.0,
execution_context={"test_case": "basic_text"}
)
# Verify pipeline completed successfully
assert execution_result.success is True
assert execution_result.execution_time > 0
if __name__ == "__main__":
pytest.main([__file__, "-v"])