"""End-to-end tests for experiment functionality.
Tests for TASK-03-03 requirements:
- TR-05: File output format verification tests
- TR-06: CLI function end-to-end tests
Implements: TASK-03-03 - End-to-end testing requirements
"""
import asyncio
import json
import tempfile
from pathlib import Path
from subprocess import CompletedProcess, run
import pytest
pytestmark = pytest.mark.dev
@pytest.mark.dev
class TestExperimentE2E:
"""End-to-end tests for experiment functionality."""
@pytest.fixture
def sample_markdown_file(self):
"""Create sample markdown file for testing."""
with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f:
f.write("""# Test Requirements Document
## REQ-01: システムは、ユーザーが要求する機能を提供すること
システムは基本的な機能を提供する必要がある。
## REQ-02: システムは、99%以上の可用性を維持すること
システムは高い可用性を保つ必要がある。
## REQ-03: システムは、1秒以内にレスポンスを返すこと
システムは迅速に応答する必要がある。
## REQ-04: システムは、エラーハンドリングを適切に行うこと
システムは異常状況を適切に処理する必要がある。
## REQ-05: システムは、セキュリティ要件を満たすこと
システムはセキュリティ基準に準拠する必要がある。
""")
return Path(f.name)
@pytest.fixture
def performance_function_code(self):
"""Create sample performance function code."""
return """
@pytest.mark.dev
def test_performance_function():
total = 0
for i in range(10000):
total += i * i
return total
"""
def run_cli_command(self, command: list[str], check: bool = True) -> CompletedProcess[str]:
"""Run CLI command and return result."""
full_command = ["uv", "run", "--active", "wassden", *command]
return run(full_command, capture_output=True, text=True, check=check, cwd=Path.cwd())
async def run_cli_command_async(self, command: list[str]) -> tuple[str, int, str]:
"""Run CLI command asynchronously and return (command, returncode, stdout)."""
full_command = ["uv", "run", "--active", "wassden", *command]
proc = await asyncio.create_subprocess_exec(
*full_command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, cwd=Path.cwd()
)
stdout, _ = await proc.communicate()
return command[0] if command else "unknown", proc.returncode or 0, stdout.decode()
@pytest.mark.asyncio
async def test_cli_help_commands(self):
"""Test TR-06: CLI help commands work end-to-end (async parallel version)."""
# Test main help first
result = self.run_cli_command(["--help"])
assert result.returncode == 0
assert "wassden" in result.stdout
assert "Usage:" in result.stdout
# Test subcommand help in parallel
subcommands = [
"prompt-requirements",
"validate-requirements",
"prompt-requirements",
"validate-design",
"prompt-design",
"validate-tasks",
"prompt-tasks",
"prompt-code",
"get-traceability",
"analyze-changes",
"generate-review-prompt",
"start-mcp-server",
]
# Run all subcommand help tests concurrently
help_commands = [[cmd, "--help"] for cmd in subcommands]
results = await asyncio.gather(
*[self.run_cli_command_async(cmd) for cmd in help_commands], return_exceptions=True
)
# Verify results
successful_commands = 0
for result in results:
if isinstance(result, Exception):
# Log error but don't fail test - some commands might not exist
continue
cmd, returncode, stdout = result
if returncode == 0:
successful_commands += 1
assert "Usage:" in stdout or "help" in stdout.lower(), f"Command {cmd} help should contain usage info"
# At least half of the commands should work
assert successful_commands >= len(subcommands) // 2, (
f"Expected at least {len(subcommands) // 2} commands to work, got {successful_commands}"
)
def test_cli_basic_functionality(self):
"""Test TR-06: Basic CLI functionality end-to-end."""
# Test completeness check
result = self.run_cli_command(["prompt-requirements", "--userInput", "Test project"])
assert result.returncode == 0
# Output should contain some guidance or response
assert len(result.stdout) > 0
# Test requirements prompt generation
result = self.run_cli_command(
[
"prompt-requirements",
"--userInput",
"Test project",
"--force",
]
)
assert result.returncode == 0
assert "requirements.md" in result.stdout
def test_file_output_format_verification_json(self, sample_markdown_file):
"""Test TR-05: JSON file output format verification."""
# Create temporary output file
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
output_file = Path(f.name)
try:
# Test EARS coverage measurement with JSON output (if command exists)
result = self.run_cli_command(
["validate-requirements", "--requirementsPath", str(sample_markdown_file)], check=False
)
# Even if command fails or doesn't exist, we can test JSON format validation
if result.returncode == 0:
# If command succeeded, check if output is valid JSON-like
output_text = result.stdout
# Look for JSON-like patterns
assert "{" in output_text or "[" in output_text or '"' in output_text
# Test JSON format validation with sample data
sample_experiment_result = {
"experiment_id": "test-001",
"experiment_type": "ears_coverage",
"timestamp": "2024-01-01T00:00:00Z",
"status": "completed",
"duration_seconds": 1.5,
"ears_report": {"total_requirements": 5, "ears_compliant": 4, "coverage_rate": 0.8, "violations": []},
"config": {"experiment_type": "ears_coverage", "description": "Test experiment", "iterations": 1},
}
# Write and validate JSON format
with output_file.open("w") as f:
json.dump(sample_experiment_result, f, indent=2)
# Verify JSON can be loaded
with output_file.open() as f:
loaded_data = json.load(f)
# Verify structure
assert loaded_data["experiment_id"] == "test-001"
assert loaded_data["ears_report"]["total_requirements"] == 5
assert loaded_data["ears_report"]["coverage_rate"] == 0.8
assert "config" in loaded_data
assert "timestamp" in loaded_data
finally:
# Cleanup
sample_markdown_file.unlink()
if output_file.exists():
output_file.unlink()
def test_file_output_format_verification_csv(self):
"""Test TR-05: CSV file output format verification."""
# Create temporary CSV output file
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
output_file = Path(f.name)
try:
# Test CSV format with sample experiment data
csv_header = (
"experiment_id,experiment_type,timestamp,status,"
"duration_seconds,total_requirements,ears_compliant,coverage_rate"
)
csv_data = "test-001,ears_coverage,2024-01-01T00:00:00Z,completed,1.5,5,4,0.8"
# Write CSV format
with output_file.open("w") as f:
f.write(csv_header + "\n")
f.write(csv_data + "\n")
# Verify CSV can be read and parsed
with output_file.open() as f:
lines = f.readlines()
assert len(lines) == 2 # Header + data
header_columns = lines[0].strip().split(",")
data_columns = lines[1].strip().split(",")
# Verify header structure
assert "experiment_id" in header_columns
assert "experiment_type" in header_columns
assert "coverage_rate" in header_columns
# Verify data structure
assert len(header_columns) == len(data_columns)
assert data_columns[0] == "test-001" # experiment_id
assert data_columns[1] == "ears_coverage" # experiment_type
assert data_columns[7] == "0.8" # coverage_rate
finally:
# Cleanup
if output_file.exists():
output_file.unlink()
def test_cli_error_handling(self):
"""Test TR-06: CLI error handling end-to-end."""
# Test with invalid file path
result = self.run_cli_command(
["validate-requirements", "--requirementsPath", "/non/existent/file.md"], check=False
)
# Should handle error gracefully (non-zero exit code or error message)
assert (
result.returncode != 0
or "not found" in result.stderr.lower()
or "not found" in result.stdout.lower()
or "見つかりません" in result.stdout
)
# Test with invalid parameters
result = self.run_cli_command(
[
"prompt-requirements"
# Missing required --userInput parameter
],
check=False,
)
# Should show help or error message
assert result.returncode != 0 or "Usage:" in result.stderr or "required" in result.stderr.lower()
def test_output_format_consistency(self, sample_markdown_file):
"""Test TR-05: Output format consistency across different experiment types."""
# Test requirements validation output format
result = self.run_cli_command(
["validate-requirements", "--requirementsPath", str(sample_markdown_file)], check=False
)
if result.returncode == 0:
output = result.stdout
# Verify output contains expected elements
assert len(output) > 0
# Should contain status information
assert (
"✅" in output
or "⚠️" in output
or "❌" in output
or "passed" in output.lower()
or "failed" in output.lower()
or "success" in output.lower()
or "error" in output.lower()
)
# Cleanup
sample_markdown_file.unlink()
def test_mcp_server_startup(self):
"""Test TR-06: MCP server startup end-to-end."""
# Test MCP server help (should not actually start server)
result = self.run_cli_command(["start-mcp-server", "--help"], check=False)
if result.returncode == 0:
assert "Usage:" in result.stderr or "transport" in result.stdout.lower()
# If command doesn't exist, that's also acceptable for basic CLI test
def test_traceability_command(self):
"""Test TR-06: Traceability command end-to-end."""
# Test traceability report generation
result = self.run_cli_command(["get-traceability"], check=False)
# Should either succeed or fail gracefully with meaningful message
if result.returncode == 0:
assert len(result.stdout) > 0
else:
# Should provide meaningful error message
assert len(result.stderr) > 0 or "not found" in result.stdout.lower()
def test_change_analysis_command(self):
"""Test TR-06: Change analysis command end-to-end."""
# Test change analysis
result = self.run_cli_command(
[
"analyze-changes",
"--requirementsPath",
"specs/requirements.md",
"--changeDescription",
"Added new feature requirement",
],
check=False,
)
# Should handle gracefully even if files don't exist
if result.returncode == 0:
assert len(result.stdout) > 0
else:
# Should provide meaningful error for missing files
assert "not found" in result.stderr.lower() or "file" in result.stderr.lower() or len(result.stderr) > 0
def test_comprehensive_workflow_integration(self, sample_markdown_file):
"""Test TR-05, TR-06: Comprehensive workflow integration."""
try:
# Step 1: Validate requirements file
result1 = self.run_cli_command(
["validate-requirements", "--requirementsPath", str(sample_markdown_file)], check=False
)
# Should complete (successfully or with meaningful error)
assert result1.returncode == 0 or len(result1.stderr) > 0
# Step 2: Generate traceability if step 1 succeeded
if result1.returncode == 0:
result2 = self.run_cli_command(["get-traceability"], check=False)
# Should either work or fail with clear message
assert result2.returncode == 0 or len(result2.stderr) > 0
# Step 3: Test completeness check
result3 = self.run_cli_command(["prompt-requirements", "--userInput", "Integration test project"])
assert result3.returncode == 0
assert len(result3.stdout) > 0
finally:
sample_markdown_file.unlink()
@pytest.mark.dev
class TestExperimentFileOutputFormats:
"""Specific tests for experiment file output formats (TR-05)."""
def test_json_experiment_result_format(self):
"""Test JSON format for experiment results."""
# Test comprehensive experiment result JSON structure
experiment_result = {
"experiment_id": "exp-20240101-001",
"experiment_type": "performance",
"timestamp": "2024-01-01T12:00:00Z",
"status": "completed",
"duration_seconds": 15.5,
"config": {
"experiment_type": "performance",
"description": "Performance measurement test",
"iterations": 10,
"timeout_seconds": 60,
},
"performance_report": {
"total_executions": 10,
"successful_executions": 10,
"average_wall_time_ms": 125.5,
"average_cpu_time_ms": 120.2,
"average_memory_mb": 25.8,
"details": [
{
"wall_time_ms": 125.5,
"cpu_time_ms": 120.2,
"memory_used_mb": 25.8,
"success": True,
"error_message": None,
}
],
},
}
# Verify JSON serialization/deserialization
json_str = json.dumps(experiment_result, indent=2)
loaded_result = json.loads(json_str)
# Verify structure preservation
assert loaded_result["experiment_id"] == "exp-20240101-001"
assert loaded_result["performance_report"]["total_executions"] == 10
assert loaded_result["performance_report"]["average_wall_time_ms"] == 125.5
assert loaded_result["config"]["iterations"] == 10
def test_csv_experiment_result_format(self):
"""Test CSV format for experiment results."""
# Create sample CSV data for experiment results
csv_lines = [
"experiment_id,experiment_type,timestamp,status,duration_seconds,total_executions,average_wall_time_ms,average_memory_mb",
"exp-001,performance,2024-01-01T12:00:00Z,completed,15.5,10,125.5,25.8",
"exp-002,ears_coverage,2024-01-01T12:05:00Z,completed,2.1,1,2100.0,15.2",
"exp-003,language_detection,2024-01-01T12:10:00Z,completed,5.8,1,5800.0,20.1",
]
# Verify CSV structure
header = csv_lines[0].split(",")
assert "experiment_id" in header
assert "experiment_type" in header
assert "status" in header
assert "duration_seconds" in header
# Verify data consistency
for line in csv_lines[1:]:
values = line.split(",")
assert len(values) == len(header) # Same number of columns
assert values[0].startswith("exp-") # Valid experiment ID format
assert values[3] in ["completed", "failed", "running"] # Valid status
def test_format_validation_edge_cases(self):
"""Test format validation with edge cases."""
# Test JSON with special characters
special_data = {
"experiment_id": "test-special-chars",
"description": "Test with \"quotes\" and 'apostrophes' and \n newlines",
"metrics": {
"unicode_test": "テスト データ",
"null_value": None,
"empty_string": "",
"number_as_string": "123.456",
},
}
# Should handle special characters gracefully
json_str = json.dumps(special_data, ensure_ascii=False, indent=2)
loaded_data = json.loads(json_str)
assert loaded_data["metrics"]["unicode_test"] == "テスト データ"
assert loaded_data["metrics"]["null_value"] is None
# Test CSV with special characters (should be escaped)
csv_line = 'exp-001,"Test with ""quotes"" and, commas",completed,1.5'
# Should handle quoted values properly
assert '"Test with ""quotes"" and, commas"' in csv_line
def test_large_output_format_handling(self):
"""Test handling of large experiment result outputs."""
# Create large experiment result
large_result = {
"experiment_id": "large-test-001",
"experiment_type": "stress_test",
"details": [
{"iteration": i, "wall_time_ms": 100 + i * 0.1, "memory_mb": 20 + i * 0.01, "success": True}
for i in range(1000) # Large dataset
],
}
# Should handle large JSON serialization
json_str = json.dumps(large_result)
assert len(json_str) > 10000 # Verify it's actually large
# Should be able to load back
loaded_result = json.loads(json_str)
assert len(loaded_result["details"]) == 1000
assert loaded_result["details"][999]["iteration"] == 999