Skip to main content
Glama
test_cross_validator.py31.2 kB
""" Comprehensive test suite for the Cross-Model Validation module. This module provides thorough testing of the cross-validation functionality including peer review, adversarial validation, and specialized validators. """ import asyncio import pytest from datetime import datetime from typing import List from unittest.mock import AsyncMock, Mock from src.openrouter_mcp.collective_intelligence.cross_validator import ( CrossValidator, ValidationConfig, ValidationStrategy, ValidationCriteria, ValidationSeverity, ValidationIssue, ValidationReport, ValidationResult, FactCheckValidator, BiasDetectionValidator ) from src.openrouter_mcp.collective_intelligence.base import ( TaskContext, ProcessingResult, ModelInfo, TaskType, QualityMetrics ) class TestValidationIssue: """Test suite for ValidationIssue dataclass.""" @pytest.mark.unit def test_validation_issue_creation(self): """Test ValidationIssue creation and attributes.""" issue = ValidationIssue( issue_id="test_issue_1", criteria=ValidationCriteria.ACCURACY, severity=ValidationSeverity.HIGH, description="Test validation issue", suggestion="Fix the issue", confidence=0.85, evidence="Supporting evidence", validator_model_id="test_validator" ) assert issue.issue_id == "test_issue_1" assert issue.criteria == ValidationCriteria.ACCURACY assert issue.severity == ValidationSeverity.HIGH assert issue.description == "Test validation issue" assert issue.suggestion == "Fix the issue" assert issue.confidence == 0.85 assert issue.evidence == "Supporting evidence" assert issue.validator_model_id == "test_validator" assert issue.line_numbers is None assert isinstance(issue.metadata, dict) class TestValidationConfig: """Test suite for ValidationConfig dataclass.""" @pytest.mark.unit def test_default_validation_config(self): """Test default ValidationConfig initialization.""" config = ValidationConfig() assert config.strategy == ValidationStrategy.PEER_REVIEW assert config.min_validators == 2 assert config.max_validators == 4 assert ValidationCriteria.ACCURACY in config.criteria assert config.confidence_threshold == 0.7 assert config.require_consensus is False assert config.consensus_threshold == 0.6 assert config.include_self_validation is False assert config.timeout_seconds == 30.0 @pytest.mark.unit def test_custom_validation_config(self): """Test ValidationConfig with custom values.""" config = ValidationConfig( strategy=ValidationStrategy.ADVERSARIAL, min_validators=3, max_validators=6, criteria=[ValidationCriteria.FACTUAL_CORRECTNESS, ValidationCriteria.BIAS_NEUTRALITY], confidence_threshold=0.8, require_consensus=True, consensus_threshold=0.75 ) assert config.strategy == ValidationStrategy.ADVERSARIAL assert config.min_validators == 3 assert config.max_validators == 6 assert len(config.criteria) == 2 assert ValidationCriteria.FACTUAL_CORRECTNESS in config.criteria assert config.confidence_threshold == 0.8 assert config.require_consensus is True assert config.consensus_threshold == 0.75 class TestFactCheckValidator: """Test suite for FactCheckValidator.""" @pytest.mark.unit def test_fact_check_validator_initialization(self, mock_model_provider): """Test FactCheckValidator initialization.""" validator = FactCheckValidator(mock_model_provider) assert validator.criteria == ValidationCriteria.FACTUAL_CORRECTNESS assert validator.model_provider == mock_model_provider @pytest.mark.asyncio @pytest.mark.unit async def test_fact_check_validate_success(self, mock_model_provider, sample_task, sample_processing_results): """Test successful fact-checking validation.""" validator = FactCheckValidator(mock_model_provider) result = sample_processing_results[0] # Mock fact-check response indicating issues fact_check_response = ProcessingResult( task_id="fact_check_test", model_id="fact_checker", content="Found several factual errors and inaccuracies in the response", confidence=0.9 ) mock_model_provider.process_task.return_value = fact_check_response issues = await validator.validate(result, sample_task, "fact_checker_model") assert isinstance(issues, list) assert len(issues) > 0 # Should find issues based on content for issue in issues: assert isinstance(issue, ValidationIssue) assert issue.criteria == ValidationCriteria.FACTUAL_CORRECTNESS assert issue.validator_model_id == "fact_checker_model" @pytest.mark.asyncio @pytest.mark.unit async def test_fact_check_validate_no_issues(self, mock_model_provider, sample_task, sample_processing_results): """Test fact-checking when no issues are found.""" validator = FactCheckValidator(mock_model_provider) result = sample_processing_results[0] # Mock fact-check response indicating no issues fact_check_response = ProcessingResult( task_id="fact_check_test", model_id="fact_checker", content="The response appears to be factually accurate with no errors detected", confidence=0.9 ) mock_model_provider.process_task.return_value = fact_check_response issues = await validator.validate(result, sample_task, "fact_checker_model") assert isinstance(issues, list) assert len(issues) == 0 # Should find no issues @pytest.mark.asyncio @pytest.mark.unit async def test_fact_check_validate_failure(self, mock_model_provider, sample_task, sample_processing_results): """Test fact-checking when validation fails.""" validator = FactCheckValidator(mock_model_provider) result = sample_processing_results[0] # Mock provider failure mock_model_provider.process_task.side_effect = Exception("Validation failed") issues = await validator.validate(result, sample_task, "fact_checker_model") assert isinstance(issues, list) assert len(issues) == 0 # Should return empty list on failure class TestBiasDetectionValidator: """Test suite for BiasDetectionValidator.""" @pytest.mark.unit def test_bias_detection_validator_initialization(self, mock_model_provider): """Test BiasDetectionValidator initialization.""" validator = BiasDetectionValidator(mock_model_provider) assert validator.criteria == ValidationCriteria.BIAS_NEUTRALITY assert validator.model_provider == mock_model_provider @pytest.mark.asyncio @pytest.mark.unit async def test_bias_detection_validate_bias_found(self, mock_model_provider, sample_task, sample_processing_results): """Test bias detection when bias is found.""" validator = BiasDetectionValidator(mock_model_provider) result = sample_processing_results[0] # Mock bias detection response indicating bias bias_check_response = ProcessingResult( task_id="bias_check_test", model_id="bias_detector", content="The response shows cultural bias and unfair stereotypes in its analysis", confidence=0.8 ) mock_model_provider.process_task.return_value = bias_check_response issues = await validator.validate(result, sample_task, "bias_detector_model") assert isinstance(issues, list) assert len(issues) > 0 # Should find bias issues for issue in issues: assert isinstance(issue, ValidationIssue) assert issue.criteria == ValidationCriteria.BIAS_NEUTRALITY assert issue.validator_model_id == "bias_detector_model" @pytest.mark.asyncio @pytest.mark.unit async def test_bias_detection_validate_no_bias(self, mock_model_provider, sample_task, sample_processing_results): """Test bias detection when no bias is found.""" validator = BiasDetectionValidator(mock_model_provider) result = sample_processing_results[0] # Mock bias detection response indicating no bias bias_check_response = ProcessingResult( task_id="bias_check_test", model_id="bias_detector", content="The response appears neutral and fair with no detectable bias", confidence=0.9 ) mock_model_provider.process_task.return_value = bias_check_response issues = await validator.validate(result, sample_task, "bias_detector_model") assert isinstance(issues, list) assert len(issues) == 0 # Should find no bias issues class TestCrossValidator: """Test suite for the CrossValidator class.""" @pytest.mark.unit def test_cross_validator_initialization(self, mock_model_provider): """Test CrossValidator initialization.""" validator = CrossValidator(mock_model_provider) assert validator.model_provider == mock_model_provider assert isinstance(validator.config, ValidationConfig) assert isinstance(validator.specialized_validators, dict) assert ValidationCriteria.FACTUAL_CORRECTNESS in validator.specialized_validators assert ValidationCriteria.BIAS_NEUTRALITY in validator.specialized_validators assert isinstance(validator.validation_history, list) assert len(validator.validation_history) == 0 @pytest.mark.unit def test_cross_validator_with_custom_config(self, mock_model_provider): """Test CrossValidator with custom configuration.""" config = ValidationConfig( strategy=ValidationStrategy.FACT_CHECK, min_validators=1, max_validators=2 ) validator = CrossValidator(mock_model_provider, config) assert validator.config == config assert validator.config.strategy == ValidationStrategy.FACT_CHECK @pytest.mark.asyncio @pytest.mark.unit async def test_select_validator_models(self, mock_model_provider, sample_models, sample_task, sample_processing_results): """Test validator model selection.""" validator = CrossValidator(mock_model_provider) result = sample_processing_results[0] mock_model_provider.get_available_models.return_value = sample_models validator_models = await validator._select_validator_models(result, sample_task) assert isinstance(validator_models, list) assert len(validator_models) >= validator.config.min_validators assert len(validator_models) <= validator.config.max_validators assert all(isinstance(model_id, str) for model_id in validator_models) @pytest.mark.asyncio @pytest.mark.unit async def test_select_validator_models_exclude_self(self, mock_model_provider, sample_models, sample_task, sample_processing_results): """Test that validator selection excludes the original model when self-validation is disabled.""" config = ValidationConfig(include_self_validation=False) validator = CrossValidator(mock_model_provider, config) result = sample_processing_results[0] mock_model_provider.get_available_models.return_value = sample_models validator_models = await validator._select_validator_models(result, sample_task) # Should not include the original model assert result.model_id not in validator_models @pytest.mark.unit def test_calculate_validator_suitability(self, mock_model_provider, sample_models, sample_task, sample_processing_results): """Test validator suitability calculation.""" validator = CrossValidator(mock_model_provider) model = sample_models[0] result = sample_processing_results[0] suitability = validator._calculate_validator_suitability(model, sample_task, result) assert isinstance(suitability, float) assert 0.0 <= suitability <= 1.0 @pytest.mark.asyncio @pytest.mark.integration async def test_peer_review_validation(self, mock_model_provider, sample_task, sample_processing_results): """Test peer review validation strategy.""" validator = CrossValidator(mock_model_provider) result = sample_processing_results[0] # Mock validator models validator_models = ["validator_1", "validator_2"] # Mock validation responses validation_responses = [ ProcessingResult( task_id="peer_review_1", model_id="validator_1", content="The response has some accuracy issues and could be improved", confidence=0.8 ), ProcessingResult( task_id="peer_review_2", model_id="validator_2", content="Overall good response with minor suggestions for clarity", confidence=0.85 ) ] mock_model_provider.process_task.side_effect = validation_responses validation_report = await validator._peer_review_validation(result, sample_task, validator_models) assert isinstance(validation_report, ValidationReport) assert validation_report.validation_strategy == ValidationStrategy.PEER_REVIEW assert validation_report.validator_models == validator_models assert len(validation_report.issues) >= 0 assert 0.0 <= validation_report.overall_score <= 1.0 assert 0.0 <= validation_report.consensus_level <= 1.0 @pytest.mark.asyncio @pytest.mark.integration async def test_adversarial_validation(self, mock_model_provider, sample_task, sample_processing_results): """Test adversarial validation strategy.""" validator = CrossValidator(mock_model_provider) result = sample_processing_results[0] validator_models = ["adversary_1", "adversary_2"] # Mock adversarial responses adversarial_responses = [ ProcessingResult( task_id="adversarial_1", model_id="adversary_1", content="I challenge this response because it has logical flaws and inconsistencies", confidence=0.9 ), ProcessingResult( task_id="adversarial_2", model_id="adversary_2", content="The response seems weak in its argumentation and lacks evidence", confidence=0.85 ) ] mock_model_provider.process_task.side_effect = adversarial_responses validation_report = await validator._adversarial_validation(result, sample_task, validator_models) assert isinstance(validation_report, ValidationReport) assert validation_report.validation_strategy == ValidationStrategy.ADVERSARIAL assert len(validation_report.issues) > 0 # Should find challenges @pytest.mark.asyncio @pytest.mark.integration async def test_consensus_validation(self, mock_model_provider, sample_task, sample_processing_results): """Test consensus validation strategy.""" validator = CrossValidator(mock_model_provider) result = sample_processing_results[0] validator_models = ["consensus_1", "consensus_2"] # Mock alternative responses for consensus alternative_responses = [ ProcessingResult( task_id=sample_task.task_id, model_id="consensus_1", content="Alternative response with similar length and content", confidence=0.8 ), ProcessingResult( task_id=sample_task.task_id, model_id="consensus_2", content="Another alternative response for consensus comparison", confidence=0.85 ) ] mock_model_provider.process_task.side_effect = alternative_responses validation_report = await validator._consensus_validation(result, sample_task, validator_models) assert isinstance(validation_report, ValidationReport) assert validation_report.validation_strategy == ValidationStrategy.CONSENSUS_CHECK assert 0.0 <= validation_report.consensus_level <= 1.0 @pytest.mark.asyncio @pytest.mark.integration async def test_fact_check_validation_strategy(self, mock_model_provider, sample_task, sample_processing_results): """Test fact-check validation strategy.""" config = ValidationConfig(strategy=ValidationStrategy.FACT_CHECK) validator = CrossValidator(mock_model_provider, config) result = sample_processing_results[0] validator_models = ["fact_checker"] # Mock fact-check response fact_check_response = ProcessingResult( task_id="fact_check", model_id="fact_checker", content="Found factual errors in the response", confidence=0.9 ) mock_model_provider.process_task.return_value = fact_check_response validation_report = await validator._fact_check_validation(result, sample_task, validator_models) assert isinstance(validation_report, ValidationReport) assert validation_report.validation_strategy == ValidationStrategy.FACT_CHECK @pytest.mark.asyncio @pytest.mark.integration async def test_full_validation_process(self, mock_model_provider, sample_task, sample_processing_results): """Test the complete validation process end-to-end.""" validator = CrossValidator(mock_model_provider) result = sample_processing_results[0] # Mock validation response mock_model_provider.process_task.return_value = ProcessingResult( task_id="validation_test", model_id="validator", content="The response appears to be accurate with minor issues", confidence=0.85 ) validation_result = await validator.process(result, sample_task) assert isinstance(validation_result, ValidationResult) assert validation_result.task_id == sample_task.task_id assert validation_result.original_result == result assert isinstance(validation_result.validation_report, ValidationReport) assert isinstance(validation_result.is_valid, bool) assert 0.0 <= validation_result.validation_confidence <= 1.0 assert isinstance(validation_result.improvement_suggestions, list) assert isinstance(validation_result.quality_metrics, QualityMetrics) assert validation_result.processing_time > 0.0 # Check that validation is stored in history assert len(validator.validation_history) == 1 assert validator.validation_history[0] == validation_result @pytest.mark.unit def test_calculate_criteria_score_no_issues(self, mock_model_provider): """Test criteria score calculation with no issues.""" validator = CrossValidator(mock_model_provider) score = validator._calculate_criteria_score([]) assert score == 1.0 # Perfect score with no issues @pytest.mark.unit def test_calculate_criteria_score_with_issues(self, mock_model_provider): """Test criteria score calculation with various issues.""" validator = CrossValidator(mock_model_provider) issues = [ ValidationIssue( issue_id="test_1", criteria=ValidationCriteria.ACCURACY, severity=ValidationSeverity.HIGH, description="High severity issue", suggestion="Fix it", confidence=0.9, evidence="Evidence", validator_model_id="validator_1" ), ValidationIssue( issue_id="test_2", criteria=ValidationCriteria.ACCURACY, severity=ValidationSeverity.LOW, description="Low severity issue", suggestion="Minor fix", confidence=0.8, evidence="Evidence", validator_model_id="validator_2" ) ] score = validator._calculate_criteria_score(issues) assert 0.0 <= score < 1.0 # Should be reduced due to issues assert isinstance(score, float) @pytest.mark.unit def test_calculate_overall_score(self, mock_model_provider): """Test overall score calculation from criteria scores.""" validator = CrossValidator(mock_model_provider) criteria_scores = { ValidationCriteria.ACCURACY: 0.9, ValidationCriteria.CONSISTENCY: 0.8, ValidationCriteria.COMPLETENESS: 0.85 } overall_score = validator._calculate_overall_score(criteria_scores) assert 0.0 <= overall_score <= 1.0 assert abs(overall_score - 0.85) < 0.01 # Should be close to the average @pytest.mark.unit def test_calculate_consensus_level(self, mock_model_provider): """Test consensus level calculation.""" validator = CrossValidator(mock_model_provider) # Issues from different validators issues = [ ValidationIssue("1", ValidationCriteria.ACCURACY, ValidationSeverity.HIGH, "", "", 0.9, "", "validator_1"), ValidationIssue("2", ValidationCriteria.ACCURACY, ValidationSeverity.LOW, "", "", 0.8, "", "validator_1"), ValidationIssue("3", ValidationCriteria.CONSISTENCY, ValidationSeverity.MEDIUM, "", "", 0.85, "", "validator_2") ] validator_models = ["validator_1", "validator_2", "validator_3"] consensus_level = validator._calculate_consensus_level(issues, validator_models) assert 0.0 <= consensus_level <= 1.0 assert isinstance(consensus_level, float) @pytest.mark.unit def test_determine_validity_valid_result(self, mock_model_provider): """Test validity determination for a valid result.""" config = ValidationConfig(confidence_threshold=0.7) validator = CrossValidator(mock_model_provider, config) # Create a validation report with good scores and no critical issues validation_report = ValidationReport( original_result=Mock(), task_context=Mock(), validation_strategy=ValidationStrategy.PEER_REVIEW, validator_models=["validator_1"], issues=[], # No issues overall_score=0.85, # Above threshold criteria_scores={}, consensus_level=0.9, recommendations=[] ) is_valid = validator._determine_validity(validation_report) assert is_valid is True @pytest.mark.unit def test_determine_validity_invalid_result(self, mock_model_provider): """Test validity determination for an invalid result.""" config = ValidationConfig(confidence_threshold=0.7) validator = CrossValidator(mock_model_provider, config) # Create a validation report with critical issues critical_issue = ValidationIssue( issue_id="critical_1", criteria=ValidationCriteria.ACCURACY, severity=ValidationSeverity.CRITICAL, description="Critical error", suggestion="Fix immediately", confidence=0.95, evidence="Strong evidence", validator_model_id="validator_1" ) validation_report = ValidationReport( original_result=Mock(), task_context=Mock(), validation_strategy=ValidationStrategy.PEER_REVIEW, validator_models=["validator_1"], issues=[critical_issue], # Critical issue present overall_score=0.85, criteria_scores={}, consensus_level=0.9, recommendations=[] ) is_valid = validator._determine_validity(validation_report) assert is_valid is False @pytest.mark.unit def test_generate_improvement_suggestions(self, mock_model_provider): """Test improvement suggestions generation.""" validator = CrossValidator(mock_model_provider) # Create validation report with various issues issues = [ ValidationIssue("1", ValidationCriteria.ACCURACY, ValidationSeverity.CRITICAL, "Critical issue", "Fix immediately", 0.95, "", "validator_1"), ValidationIssue("2", ValidationCriteria.CONSISTENCY, ValidationSeverity.HIGH, "High issue", "Review carefully", 0.9, "", "validator_2") ] validation_report = ValidationReport( original_result=Mock(), task_context=Mock(), validation_strategy=ValidationStrategy.PEER_REVIEW, validator_models=["validator_1", "validator_2"], issues=issues, overall_score=0.5, # Low score criteria_scores={}, consensus_level=0.6, # Low consensus recommendations=[] ) suggestions = validator._generate_improvement_suggestions(validation_report) assert isinstance(suggestions, list) assert len(suggestions) > 0 assert any("critical" in suggestion.lower() for suggestion in suggestions) @pytest.mark.unit def test_get_validation_history(self, mock_model_provider): """Test getting validation history.""" validator = CrossValidator(mock_model_provider) # Add some test validation results test_results = [ ValidationResult( task_id=f"task_{i}", original_result=Mock(), validation_report=Mock(), is_valid=True, validation_confidence=0.8, improvement_suggestions=[], quality_metrics=Mock(), processing_time=1.0 ) for i in range(5) ] validator.validation_history = test_results # Test getting full history full_history = validator.get_validation_history() assert len(full_history) == 5 assert full_history == test_results # Test getting limited history limited_history = validator.get_validation_history(limit=3) assert len(limited_history) == 3 assert limited_history == test_results[-3:] @pytest.mark.unit def test_configure_validation(self, mock_model_provider): """Test validation configuration updates.""" validator = CrossValidator(mock_model_provider) original_threshold = validator.config.confidence_threshold original_timeout = validator.config.timeout_seconds validator.configure_validation( confidence_threshold=0.9, timeout_seconds=60.0, min_validators=3 ) assert validator.config.confidence_threshold == 0.9 assert validator.config.timeout_seconds == 60.0 assert validator.config.min_validators == 3 assert validator.config.confidence_threshold != original_threshold assert validator.config.timeout_seconds != original_timeout @pytest.mark.asyncio @pytest.mark.performance async def test_validation_performance(self, performance_mock_provider, sample_task): """Test validation performance with multiple validators.""" validator = CrossValidator(performance_mock_provider) # Create a test result test_result = ProcessingResult( task_id=sample_task.task_id, model_id="test_model", content="Test content for validation", confidence=0.8 ) start_time = datetime.now() validation_result = await validator.process(test_result, sample_task) end_time = datetime.now() processing_time = (end_time - start_time).total_seconds() # Should complete validation within reasonable time assert processing_time < 5.0 # 5 seconds max assert isinstance(validation_result, ValidationResult) @pytest.mark.asyncio @pytest.mark.edge_case async def test_validation_with_no_available_models(self, mock_model_provider, sample_task, sample_processing_results): """Test validation when no models are available.""" mock_model_provider.get_available_models.return_value = [] validator = CrossValidator(mock_model_provider) result = sample_processing_results[0] # Should handle gracefully or raise appropriate error with pytest.raises(Exception): await validator.process(result, sample_task) @pytest.mark.asyncio @pytest.mark.integration async def test_concurrent_validations(self, mock_model_provider, sample_task): """Test concurrent validation requests.""" validator = CrossValidator(mock_model_provider) # Create multiple test results test_results = [ ProcessingResult( task_id=f"concurrent_task_{i}", model_id=f"model_{i}", content=f"Test content {i}", confidence=0.8 ) for i in range(3) ] # Mock validation responses mock_model_provider.process_task.return_value = ProcessingResult( task_id="validation", model_id="validator", content="Validation response", confidence=0.8 ) # Run validations concurrently validation_results = await asyncio.gather( *[validator.process(result, sample_task) for result in test_results], return_exceptions=True ) # All should succeed assert len(validation_results) == 3 assert all(isinstance(result, ValidationResult) for result in validation_results) assert len(set(result.task_id for result in validation_results)) == 3 # All unique

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/physics91/openrouter-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server