wassden

Overview Schema Related Servers Score Discussions

test_experiment_manager.py•29 KiB

"""Unit tests for experiment manager.""" import asyncio import tempfile from pathlib import Path from unittest.mock import patch import pytest import yaml from wassden.lib.experiment import ( ExperimentConfig, ExperimentStatus, ExperimentType, OutputFormat, ) from wassden.lib.experiment_manager import ( ConfigurationError, ExecutionError, ExperimentManager, ExperimentManagerError, ) pytestmark = pytest.mark.dev @pytest.mark.dev class TestExperimentManagerExecution: """Test experiment execution management functionality.""" def test_create_experiment_result(self): """Test creation of experiment result objects.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) config = ExperimentConfig( experiment_type=ExperimentType.PERFORMANCE, timeout_seconds=300, memory_limit_mb=150, output_format=[OutputFormat.JSON], ) result = manager.create_experiment_result(config) # Verify result properties assert result.experiment_id is not None assert len(result.experiment_id) > 10 # UUID-like length assert result.config == config assert result.status == ExperimentStatus.PENDING assert result.duration_seconds == 0.0 assert result.metadata == {} def test_update_experiment_status(self): """Test experiment status updates.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) config = ExperimentConfig( experiment_type=ExperimentType.EARS_COVERAGE, timeout_seconds=120, memory_limit_mb=75, output_format=[OutputFormat.YAML], ) result = manager.create_experiment_result(config) experiment_id = result.experiment_id # Test status progression manager.update_experiment_status(experiment_id, ExperimentStatus.RUNNING) updated_result = manager.get_experiment_result(experiment_id) assert updated_result.status == ExperimentStatus.RUNNING # Test completion with metadata test_metadata = {"results": {"accuracy": 0.95}, "time_taken": 2.5} manager.update_experiment_status(experiment_id, ExperimentStatus.COMPLETED, metadata=test_metadata) final_result = manager.get_experiment_result(experiment_id) assert final_result.status == ExperimentStatus.COMPLETED assert final_result.metadata == test_metadata assert final_result.duration_seconds > 0 # Should be automatically calculated def test_update_experiment_status_with_error(self): """Test experiment status update with error message.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) config = ExperimentConfig( experiment_type=ExperimentType.COMPARATIVE, timeout_seconds=180, memory_limit_mb=200, output_format=[OutputFormat.JSON], ) result = manager.create_experiment_result(config) experiment_id = result.experiment_id # Test failure with error message error_message = "Input file not found" manager.update_experiment_status(experiment_id, ExperimentStatus.FAILED, error_message=error_message) failed_result = manager.get_experiment_result(experiment_id) assert failed_result.status == ExperimentStatus.FAILED assert failed_result.error_message == error_message """Test experiment manager exception classes.""" def test_experiment_manager_error_inheritance(self): """Test ExperimentManagerError is base exception.""" error = ExperimentManagerError("test message") assert str(error) == "test message" assert isinstance(error, Exception) def test_configuration_error_inheritance(self): """Test ConfigurationError inherits from ExperimentManagerError.""" error = ConfigurationError("config error") assert str(error) == "config error" assert isinstance(error, ExperimentManagerError) def test_execution_error_inheritance(self): """Test ExecutionError inherits from ExperimentManagerError.""" error = ExecutionError("execution error") assert str(error) == "execution error" assert isinstance(error, ExperimentManagerError) @pytest.mark.dev class TestExperimentManagerAdvanced: """Test advanced ExperimentManager functionality.""" def test_list_active_experiments_and_cleanup(self): """Test active experiment listing and cleanup functionality.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) # Create multiple experiments with different statuses configs = [ ExperimentConfig( experiment_type=ExperimentType.PERFORMANCE, timeout_seconds=300, memory_limit_mb=100, output_format=[OutputFormat.JSON], ), ExperimentConfig( experiment_type=ExperimentType.EARS_COVERAGE, timeout_seconds=600, memory_limit_mb=200, output_format=[OutputFormat.CSV], ), ExperimentConfig( experiment_type=ExperimentType.LANGUAGE_DETECTION, timeout_seconds=450, memory_limit_mb=150, output_format=[OutputFormat.YAML], ), ] results = [] for config in configs: result = manager.create_experiment_result(config) results.append(result) # Verify all experiments are initially active active_experiments = manager.list_active_experiments() assert len(active_experiments) == 3 # Update statuses to different states manager.update_experiment_status(results[0].experiment_id, ExperimentStatus.COMPLETED) manager.update_experiment_status( results[1].experiment_id, ExperimentStatus.FAILED, error_message="Test error" ) manager.update_experiment_status(results[2].experiment_id, ExperimentStatus.RUNNING) # Test cleanup - should remove completed and failed cleaned_count = manager.cleanup_completed_experiments() assert cleaned_count == 2 # Completed and failed experiments removed # Verify only running experiment remains remaining_experiments = manager.list_active_experiments() assert len(remaining_experiments) == 1 assert remaining_experiments[0].status == ExperimentStatus.RUNNING assert remaining_experiments[0].experiment_id == results[2].experiment_id def test_experiment_not_found_error_handling(self): """Test error handling for non-existent experiments.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) non_existent_id = "non-existent-experiment-id" # Test get_experiment_result with non-existent ID result = manager.get_experiment_result(non_existent_id) assert result is None # Test update_experiment_status with non-existent ID with pytest.raises(ExecutionError, match="Experiment .* not found"): manager.update_experiment_status(non_existent_id, ExperimentStatus.COMPLETED) def test_create_default_config_for_all_experiment_types(self): """Test default config creation for all experiment types.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) # Test each experiment type experiment_types = [ ExperimentType.PERFORMANCE, ExperimentType.EARS_COVERAGE, ExperimentType.LANGUAGE_DETECTION, ExperimentType.COMPARATIVE, ] for exp_type in experiment_types: config = manager.create_default_config(exp_type) # Verify basic properties assert config.experiment_type == exp_type assert config.timeout_seconds == 600 # Default value assert config.memory_limit_mb == 100 # Default value assert OutputFormat.JSON in config.output_format # Default format assert isinstance(config.parameters, dict) # Test that config can be saved and loaded config_name = f"test_default_{exp_type.value}" saved_path = manager.save_config(config, config_name) assert saved_path.exists() loaded_config = manager.load_config(config_name) assert loaded_config.experiment_type == exp_type @pytest.mark.dev class TestExperimentManagerInit: """Test ExperimentManager initialization.""" def test_init_default_config_dir(self, tmp_path): """Test initialization with default config directory.""" with patch("wassden.lib.experiment_manager.Path.cwd", return_value=tmp_path): manager = ExperimentManager() expected_dir = tmp_path / ".wassden" / "experiments" assert manager.config_dir == expected_dir # Directory is not created until needed assert not manager.config_dir.exists() def test_init_custom_config_dir(self): """Test initialization with custom config directory.""" with tempfile.TemporaryDirectory() as temp_dir: custom_dir = Path(temp_dir) / "custom_experiments" manager = ExperimentManager(config_dir=custom_dir) assert manager.config_dir == custom_dir # Directory is not created until needed assert not manager.config_dir.exists() def test_init_active_experiments_empty(self, tmp_path): """Test initialization with empty active experiments.""" with patch("wassden.lib.experiment_manager.Path.cwd", return_value=tmp_path): manager = ExperimentManager() assert manager._active_experiments == {} def test_ensure_config_dir_creates_directory(self, tmp_path): """Test that _ensure_config_dir creates the directory when needed.""" config_dir = tmp_path / "test_config" manager = ExperimentManager(config_dir=config_dir) # Directory doesn't exist initially assert not config_dir.exists() # Call _ensure_config_dir manager._ensure_config_dir() # Directory should now exist assert config_dir.exists() assert config_dir.is_dir() @pytest.mark.dev class TestExperimentManagerSaveConfig: """Test save_config method with real file operations.""" def test_save_config_success(self): """Test successful config saving with actual file verification.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) config = ExperimentConfig( experiment_type=ExperimentType.PERFORMANCE, timeout_seconds=300, memory_limit_mb=150, output_format=[OutputFormat.JSON], parameters={"measurement_rounds": 5, "warmup": True}, ) config_path = manager.save_config(config, "test_config") # Verify file was created at correct location expected_path = manager.config_dir / "test_config.yaml" assert config_path == expected_path assert config_path.exists() assert config_path.is_file() # Verify content is valid YAML and matches configuration with config_path.open() as f: saved_data = yaml.safe_load(f) # Verify all fields are correctly saved assert saved_data["experiment_type"] == "performance" assert saved_data["timeout_seconds"] == 300 assert saved_data["memory_limit_mb"] == 150 assert saved_data["output_format"] == ["json"] assert saved_data["parameters"] == {"measurement_rounds": 5, "warmup": True} # Verify file can be loaded back into ExperimentConfig reloaded_config = ExperimentConfig(**saved_data) assert reloaded_config.experiment_type == config.experiment_type assert reloaded_config.parameters == config.parameters def test_save_config_file_permissions(self): """Test that saved config files have correct permissions.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) config = ExperimentConfig( experiment_type=ExperimentType.EARS_COVERAGE, timeout_seconds=120, memory_limit_mb=75, output_format=[OutputFormat.YAML], ) config_path = manager.save_config(config, "permission_test") # Verify file is readable and writable assert config_path.exists() assert config_path.stat().st_mode & 0o600 # Owner read/write # Verify content can be read content = config_path.read_text() assert "experiment_type: ears_coverage" in content def test_save_config_overwrites_existing(self): """Test that save_config overwrites existing config.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) # Save first config config1 = ExperimentConfig( experiment_type=ExperimentType.EARS_COVERAGE, timeout_seconds=100, memory_limit_mb=50, output_format=[OutputFormat.YAML], ) manager.save_config(config1, "duplicate_test") # Save second config with same name config2 = ExperimentConfig( experiment_type=ExperimentType.LANGUAGE_DETECTION, timeout_seconds=200, memory_limit_mb=100, output_format=[OutputFormat.JSON], ) config_path = manager.save_config(config2, "duplicate_test") # Verify latest config was saved with config_path.open() as f: saved_data = yaml.safe_load(f) assert saved_data["experiment_type"] == "language_detection" assert saved_data["timeout_seconds"] == 200 @pytest.mark.dev class TestExperimentManagerLoadConfig: """Test load_config method.""" def test_load_config_success(self): """Test successful config loading.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) # Create config file config_data = { "experiment_type": "comparative", "timeout_seconds": 400, "memory_limit_mb": 200, "output_format": ["json", "yaml"], } config_path = manager.config_dir / "load_test.yaml" with config_path.open("w") as f: yaml.dump(config_data, f) # Load config loaded_config = manager.load_config("load_test") assert loaded_config.experiment_type == ExperimentType.COMPARATIVE assert loaded_config.timeout_seconds == 400 assert loaded_config.memory_limit_mb == 200 # Note: ExperimentConfig.output_format only accepts one format at a time in a list assert OutputFormat.JSON in loaded_config.output_format def test_load_config_file_not_found(self): """Test loading non-existent config raises ConfigurationError.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) with pytest.raises(ConfigurationError, match="Configuration file .* not found"): manager.load_config("nonexistent") @pytest.mark.dev class TestExperimentManagerCreateDefaultConfig: """Test create_default_config method.""" def test_create_default_config_performance(self, tmp_path): """Test creating default performance config.""" manager = ExperimentManager(config_dir=tmp_path) config = manager.create_default_config(ExperimentType.PERFORMANCE) assert config.experiment_type == ExperimentType.PERFORMANCE assert config.timeout_seconds == 600 assert config.memory_limit_mb == 100 assert OutputFormat.JSON in config.output_format def test_create_default_config_ears_coverage(self, tmp_path): """Test creating default EARS coverage config.""" manager = ExperimentManager(config_dir=tmp_path) config = manager.create_default_config(ExperimentType.EARS_COVERAGE) assert config.experiment_type == ExperimentType.EARS_COVERAGE assert config.timeout_seconds == 600 assert config.memory_limit_mb == 100 assert OutputFormat.JSON in config.output_format def test_create_default_config_language_detection(self, tmp_path): """Test creating default language detection config.""" manager = ExperimentManager(config_dir=tmp_path) config = manager.create_default_config(ExperimentType.LANGUAGE_DETECTION) assert config.experiment_type == ExperimentType.LANGUAGE_DETECTION assert config.timeout_seconds == 600 assert config.memory_limit_mb == 100 assert OutputFormat.JSON in config.output_format def test_create_default_config_comparative(self, tmp_path): """Test creating default comparative config.""" manager = ExperimentManager(config_dir=tmp_path) config = manager.create_default_config(ExperimentType.COMPARATIVE) assert config.experiment_type == ExperimentType.COMPARATIVE assert config.timeout_seconds == 600 assert config.memory_limit_mb == 100 assert OutputFormat.JSON in config.output_format @pytest.mark.dev class TestExperimentManagerListConfigs: """Test list_configs method.""" def test_list_configs_empty(self): """Test listing configs when directory is empty.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) configs = manager.list_configs() assert configs == [] def test_list_configs_nonexistent_directory(self): """Test listing configs when config directory doesn't exist.""" with tempfile.TemporaryDirectory() as temp_dir: # Use a non-existent subdirectory nonexistent_dir = Path(temp_dir) / "nonexistent" manager = ExperimentManager(config_dir=nonexistent_dir) configs = manager.list_configs() assert configs == [] def test_list_configs_with_files(self): """Test listing configs with multiple files.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) # Create test config files config_names = ["config1", "config2", "config3"] for name in config_names: config = ExperimentConfig( experiment_type=ExperimentType.PERFORMANCE, timeout_seconds=300, memory_limit_mb=100, output_format=[OutputFormat.JSON], ) manager.save_config(config, name) configs = manager.list_configs() assert len(configs) == 3 assert all(name in configs for name in config_names) def test_list_configs_ignores_non_yaml(self): """Test that list_configs ignores non-YAML files.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) # Create YAML config config = ExperimentConfig( experiment_type=ExperimentType.PERFORMANCE, timeout_seconds=300, memory_limit_mb=100, output_format=[OutputFormat.JSON], ) manager.save_config(config, "valid_config") # Create non-YAML files (manager.config_dir / "readme.txt").write_text("Not a config") (manager.config_dir / "config.json").write_text("{}") configs = manager.list_configs() assert configs == ["valid_config"] @pytest.mark.dev class TestExperimentManagerBasicFunctionality: """Test basic experiment manager functionality that is implemented.""" def test_save_and_load_config_integration(self): """Test saving and loading config integration.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) # Create and save config config = manager.create_default_config(ExperimentType.PERFORMANCE) config.timeout_seconds = 300 config.memory_limit_mb = 150 manager.save_config(config, "integration_test") # Load and verify loaded_config = manager.load_config("integration_test") assert loaded_config.experiment_type == ExperimentType.PERFORMANCE assert loaded_config.timeout_seconds == 300 assert loaded_config.memory_limit_mb == 150 @pytest.mark.dev class TestExperimentManagerAsyncExecution: """Test asynchronous experiment execution functionality.""" @pytest.mark.asyncio async def test_run_experiment_basic_workflow(self): """Test basic run_experiment workflow.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) config = ExperimentConfig( experiment_type=ExperimentType.PERFORMANCE, timeout_seconds=300, memory_limit_mb=100, output_format=[OutputFormat.JSON], parameters={"test_documents": ["test.py"]}, ) # Execute experiment result = await manager.run_experiment(config) # Verify complete workflow assert result.status == ExperimentStatus.COMPLETED assert result.experiment_id in manager._active_experiments assert "experiment_type" in result.metadata assert result.metadata["experiment_type"] == "performance" assert "parameters" in result.metadata assert result.metadata["parameters"] == config.parameters assert "output_formats" in result.metadata assert result.metadata["output_formats"] == ["json"] @pytest.mark.asyncio async def test_run_experiment_all_types(self): """Test run_experiment with all experiment types.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) test_cases = [ (ExperimentType.PERFORMANCE, {"test_documents": ["perf.py"]}), (ExperimentType.EARS_COVERAGE, {"test_documents": ["ears.py"]}), (ExperimentType.LANGUAGE_DETECTION, {"test_documents": ["lang.py"]}), (ExperimentType.COMPARATIVE, {"test_documents": ["comp.py"], "baseline_experiment_id": "baseline-123"}), ] for exp_type, params in test_cases: config = ExperimentConfig( experiment_type=exp_type, timeout_seconds=300, memory_limit_mb=100, output_format=[OutputFormat.JSON], parameters=params, ) result = await manager.run_experiment(config) assert result.status == ExperimentStatus.COMPLETED assert result.metadata["experiment_type"] == exp_type.value assert result.metadata["parameters"] == params @pytest.mark.asyncio async def test_run_experiment_multiple_output_formats(self): """Test run_experiment with multiple output formats.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) config = ExperimentConfig( experiment_type=ExperimentType.PERFORMANCE, timeout_seconds=300, memory_limit_mb=100, output_format=[OutputFormat.JSON, OutputFormat.CSV, OutputFormat.YAML], parameters={"test_documents": ["test.py"]}, ) result = await manager.run_experiment(config) assert result.status == ExperimentStatus.COMPLETED expected_formats = ["json", "csv", "yaml"] assert result.metadata["output_formats"] == expected_formats @pytest.mark.asyncio async def test_run_experiment_error_handling(self): """Test run_experiment error handling.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) config = ExperimentConfig( experiment_type=ExperimentType.PERFORMANCE, timeout_seconds=300, memory_limit_mb=100, output_format=[OutputFormat.JSON], parameters={"test_documents": ["test.py"]}, ) # Mock update_experiment_status to raise an exception original_update = manager.update_experiment_status call_count = 0 def mock_update(exp_id, status, **kwargs): nonlocal call_count call_count += 1 if call_count == 2: # Second call (COMPLETED status) raise RuntimeError("Simulated update error") return original_update(exp_id, status, **kwargs) manager.update_experiment_status = mock_update with pytest.raises(ExecutionError, match="Experiment execution failed"): await manager.run_experiment(config) # Verify experiment was marked as failed experiment_id = next(iter(manager._active_experiments.keys())) failed_result = manager._active_experiments[experiment_id] assert failed_result.status == ExperimentStatus.FAILED assert "Simulated update error" in failed_result.error_message @pytest.mark.asyncio async def test_run_experiment_status_transitions(self): """Test experiment status transitions during execution.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) config = ExperimentConfig( experiment_type=ExperimentType.PERFORMANCE, timeout_seconds=300, memory_limit_mb=100, output_format=[OutputFormat.JSON], parameters={"test_documents": ["test.py"]}, ) # Track status changes status_changes = [] original_update = manager.update_experiment_status def track_status(exp_id, status, **kwargs): status_changes.append((exp_id, status)) return original_update(exp_id, status, **kwargs) manager.update_experiment_status = track_status result = await manager.run_experiment(config) # Verify status transition: PENDING -> RUNNING -> COMPLETED assert len(status_changes) == 2 assert status_changes[0][1] == ExperimentStatus.RUNNING assert status_changes[1][1] == ExperimentStatus.COMPLETED assert result.status == ExperimentStatus.COMPLETED @pytest.mark.asyncio async def test_run_experiment_concurrent_execution(self): """Test multiple concurrent experiment executions.""" with tempfile.TemporaryDirectory() as temp_dir: manager = ExperimentManager(config_dir=Path(temp_dir)) configs = [ ExperimentConfig( experiment_type=ExperimentType.PERFORMANCE, timeout_seconds=300, memory_limit_mb=100, output_format=[OutputFormat.JSON], parameters={"test_documents": [f"test{i}.py"]}, ) for i in range(3) ] # Execute all experiments concurrently tasks = [manager.run_experiment(config) for config in configs] results = await asyncio.gather(*tasks) # Verify all experiments completed successfully assert len(results) == 3 for result in results: assert result.status == ExperimentStatus.COMPLETED # Verify all have unique IDs experiment_ids = [result.experiment_id for result in results] assert len(set(experiment_ids)) == 3 # Verify all are tracked in manager assert len(manager._active_experiments) == 3

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tokusumi/wassden-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_experiment_manager.py•29 KiB