test_sandbox_integration_simple.py•11.1 kB
"""
Simple integration tests for sandbox execution without LLM dependency.
"""
import pytest
import asyncio
import sys
from pathlib import Path
# Add the project root to the path
sys.path.insert(0, str(Path(__file__).parent.parent))
from katamari_mcp.acp.controller import ACPController
class TestSandboxIntegrationSimple:
"""Test sandbox integration with predefined code."""
@pytest.fixture
def acp_controller(self):
"""Create ACP controller instance."""
return ACPController()
@pytest.mark.asyncio
async def test_predefined_capability_execution(self, acp_controller):
"""Test executing a predefined capability in the sandbox."""
# Predefined capability code
calculator_code = '''
def simple_calculator(a, b):
"""Simple calculator that adds two numbers."""
return a + b
# Execute the function
result = simple_calculator(parameters.get("a", 0), parameters.get("b", 0))
'''
# Validate the code
validation_issues = await acp_controller._validate_code_in_sandbox(calculator_code)
assert len(validation_issues) == 0, f"Validation failed: {validation_issues}"
# Execute the capability
execution_result = await acp_controller.sandbox.execute_capability(
calculator_code,
capability_name="simple_calculator",
parameters={"a": 15, "b": 7}
)
assert execution_result.success is True
assert execution_result.data == 22 # 15 + 7 = 22
assert execution_result.execution_time > 0
@pytest.mark.asyncio
async def test_data_processing_capability(self, acp_controller):
"""Test a data processing capability."""
data_processor_code = '''
import json
def analyze_data(data):
"""Analyze a list of numbers and return statistics."""
if not isinstance(data, list):
raise ValueError("Data must be a list")
if len(data) == 0:
return {"count": 0, "sum": 0, "average": 0, "min": 0, "max": 0}
numeric_data = [x for x in data if isinstance(x, (int, float))]
if not numeric_data:
return {"count": len(data), "sum": 0, "average": 0, "min": 0, "max": 0}
return {
"count": len(numeric_data),
"sum": sum(numeric_data),
"average": sum(numeric_data) / len(numeric_data),
"min": min(numeric_data),
"max": max(numeric_data)
}
# Execute the analysis
result = analyze_data(parameters.get("data", []))
'''
# Validate and execute
validation_issues = await acp_controller._validate_code_in_sandbox(data_processor_code)
assert len(validation_issues) == 0
execution_result = await acp_controller.sandbox.execute_capability(
data_processor_code,
capability_name="data_analyzer",
parameters={"data": [5, 10, 15, 20, 25]}
)
assert execution_result.success is True
result_data = execution_result.data
assert result_data["count"] == 5
assert result_data["sum"] == 75
assert result_data["average"] == 15.0
assert result_data["min"] == 5
assert result_data["max"] == 25
@pytest.mark.asyncio
async def test_error_handling_capability(self, acp_controller):
"""Test a capability with error handling."""
safe_divider_code = '''
def safe_divide(numerator, denominator):
"""Safely divide two numbers with error handling."""
try:
if denominator == 0:
return {"success": False, "error": "Division by zero", "result": None}
result = numerator / denominator
return {"success": True, "error": None, "result": result}
except Exception as e:
return {"success": False, "error": str(e), "result": None}
# Execute the function
result = safe_divide(
parameters.get("numerator", 0),
parameters.get("denominator", 1)
)
'''
# Validate
validation_issues = await acp_controller._validate_code_in_sandbox(safe_divider_code)
assert len(validation_issues) == 0
# Test normal case
normal_result = await acp_controller.sandbox.execute_capability(
safe_divider_code,
capability_name="safe_divider",
parameters={"numerator": 20, "denominator": 4}
)
assert normal_result.success is True
assert normal_result.data["success"] is True
assert normal_result.data["result"] == 5.0
# Test error case
error_result = await acp_controller.sandbox.execute_capability(
safe_divider_code,
capability_name="safe_divider",
parameters={"numerator": 20, "denominator": 0}
)
assert error_result.success is True # Capability itself handled the error
assert error_result.data["success"] is False
assert "Division by zero" in error_result.data["error"]
@pytest.mark.asyncio
async def test_restricted_imports_blocked(self, acp_controller):
"""Test that dangerous imports are properly blocked."""
malicious_code = '''
import subprocess
result = "should not reach here"
'''
# Should fail validation
validation_issues = await acp_controller._validate_code_in_sandbox(malicious_code)
assert len(validation_issues) > 0
assert any("subprocess" in issue for issue in validation_issues)
# Should fail execution
execution_result = await acp_controller.sandbox.execute_capability(
malicious_code,
capability_name="malicious_test"
)
assert execution_result.success is False
assert "ImportError" in execution_result.error or "not allowed" in execution_result.error.lower()
@pytest.mark.asyncio
async def test_heuristic_evaluation(self, acp_controller):
"""Test heuristic evaluation of capabilities."""
safe_capability = '''
def text_processor(text):
"""Process text and return basic statistics."""
if not isinstance(text, str):
raise ValueError("Input must be a string")
words = text.split()
return {
"word_count": len(words),
"char_count": len(text),
"char_count_no_spaces": len(text.replace(" ", ""))
}
result = text_processor(parameters.get("text", ""))
'''
# Evaluate with heuristics
heuristic_result = acp_controller.heuristic_engine.evaluate_capability(
"text_processor",
safe_capability,
["text", "utility", "safe"]
)
assert heuristic_result["approved"] is True
assert heuristic_result["risk_score"] < 0.5 # Should be low risk
assert len(heuristic_result["applied_tags"]) > 0
# Validate in sandbox
validation_issues = await acp_controller._validate_code_in_sandbox(safe_capability)
assert len(validation_issues) == 0
# Execute
execution_result = await acp_controller.sandbox.execute_capability(
safe_capability,
capability_name="text_processor",
parameters={"text": "Hello world! This is a test."}
)
assert execution_result.success is True
result_data = execution_result.data
assert result_data["word_count"] == 6
assert result_data["char_count"] == 29
assert result_data["char_count_no_spaces"] == 24
@pytest.mark.asyncio
async def test_full_pipeline_without_llm(self, acp_controller):
"""Test the complete pipeline except LLM generation."""
# Step 1: Define capability need (simulated)
capability_need = {
"name": "number_validator",
"description": "Validate if a number is within a specified range",
"tags": ["validation", "utility"],
"priority": "medium"
}
# Step 2: Use predefined code instead of LLM generation
predefined_code = '''
def validate_range(number, min_val, max_val):
"""Validate if a number is within the specified range."""
try:
num = float(number)
if min_val <= num <= max_val:
return {
"valid": True,
"number": num,
"range": [min_val, max_val],
"message": f"{num} is within range [{min_val}, {max_val}]"
}
else:
return {
"valid": False,
"number": num,
"range": [min_val, max_val],
"message": f"{num} is outside range [{min_val}, {max_val}]"
}
except (ValueError, TypeError):
return {
"valid": False,
"error": f"Invalid number: {number}",
"range": [min_val, max_val]
}
# Execute validation
result = validate_range(
parameters.get("number", 0),
parameters.get("min_val", 0),
parameters.get("max_val", 100)
)
'''
# Step 3: Validate with heuristics
heuristic_result = acp_controller.heuristic_engine.evaluate_capability(
capability_need["name"],
predefined_code,
capability_need["tags"]
)
assert heuristic_result["approved"] is True
assert heuristic_result["risk_score"] < 0.3 # Should be very low risk
# Step 4: Validate in sandbox
validation_issues = await acp_controller._validate_code_in_sandbox(predefined_code)
assert len(validation_issues) == 0
# Step 5: Execute with test data
test_cases = [
{"number": 50, "min_val": 0, "max_val": 100}, # Valid
{"number": 150, "min_val": 0, "max_val": 100}, # Invalid (too high)
{"number": -10, "min_val": 0, "max_val": 100}, # Invalid (too low)
{"number": "invalid", "min_val": 0, "max_val": 100} # Invalid (not a number)
]
for i, test_params in enumerate(test_cases):
execution_result = await acp_controller.sandbox.execute_capability(
predefined_code,
capability_name=capability_need["name"],
parameters=test_params
)
assert execution_result.success is True
result_data = execution_result.data
if i == 0: # Valid case
assert result_data["valid"] is True
else: # Invalid cases
assert result_data["valid"] is False
# Step 6: Collect feedback (simulated)
await acp_controller.feedback_collector.collect_execution_feedback(
capability_name=capability_need["name"],
execution_result=execution_result,
user_satisfaction=4.5,
execution_context={"test_cases": len(test_cases)}
)
# Verify pipeline completed successfully
assert execution_result.success is True
assert execution_result.execution_time > 0
if __name__ == "__main__":
pytest.main([__file__, "-v"])