MCP News Collector

test_rumor_detector.py•27.1 kB

"""Tests for rumor detection system.""" import pytest from unittest.mock import Mock, AsyncMock, patch, MagicMock from datetime import datetime, timezone, timedelta from typing import Dict, List, Any from src.analysis.rumor_detector import ( RumorDetector, RumorResult, RumorScore, RumorType, RumorError, SourceReliability, ContentPattern, PropagationMetrics ) class TestRumorDetector: """Test cases for RumorDetector.""" @pytest.fixture def detector(self): """Create RumorDetector instance for testing.""" return RumorDetector() @pytest.fixture def sample_news_item(self): """Sample news item for testing.""" return { "id": "news_123", "title": "삼성전자 주가 급등, 비공개 정보 유출?", "content": "익명의 소식통에 따르면 삼성전자가 곧 혁신적인 발표를 할 예정이라고 합니다. 이 정보는 아직 공식 확인되지 않았습니다.", "url": "https://example.com/news/123", "source": "unknown_blog", "author": "anonymous", "published_at": datetime.now(timezone.utc), "social_shares": 1500, "view_count": 5000, "comments_count": 200 } @pytest.fixture def sample_reliable_news(self): """Sample reliable news for comparison.""" return { "id": "news_456", "title": "삼성전자 3분기 실적 발표", "content": "삼성전자가 공식 보도자료를 통해 3분기 실적을 발표했습니다. 매출은 전년 동기 대비 10% 증가했습니다.", "url": "https://samsung.com/press/456", "source": "samsung_official", "author": "삼성전자 IR팀", "published_at": datetime.now(timezone.utc), "social_shares": 500, "view_count": 10000, "comments_count": 50 } def test_rumor_detector_initialization(self, detector): """Test RumorDetector initialization.""" assert detector is not None assert hasattr(detector, 'analyze_rumor') assert hasattr(detector, 'detect_patterns') assert hasattr(detector, 'check_source_reliability') @pytest.mark.asyncio async def test_analyze_rumor_basic(self, detector, sample_news_item): """Test basic rumor analysis.""" result = await detector.analyze_rumor(sample_news_item) assert isinstance(result, RumorResult) assert result.news_id == "news_123" assert isinstance(result.rumor_score, float) assert 0.0 <= result.rumor_score <= 1.0 assert result.rumor_type in [t.value for t in RumorType] assert isinstance(result.confidence, float) @pytest.mark.asyncio async def test_source_reliability_check(self, detector): """Test source reliability checking.""" # Test unreliable source unreliable_result = await detector.check_source_reliability("unknown_blog") assert isinstance(unreliable_result, SourceReliability) assert unreliable_result.reliability_score < 0.5 # Test reliable source reliable_result = await detector.check_source_reliability("samsung_official") assert reliable_result.reliability_score > 0.7 @pytest.mark.asyncio async def test_content_pattern_detection(self, detector, sample_news_item): """Test content pattern detection for rumors.""" patterns = await detector.detect_patterns(sample_news_item["content"]) assert isinstance(patterns, list) for pattern in patterns: assert isinstance(pattern, ContentPattern) assert hasattr(pattern, 'pattern_type') assert hasattr(pattern, 'confidence') assert hasattr(pattern, 'evidence') @pytest.mark.asyncio async def test_anonymous_source_detection(self, detector): """Test detection of anonymous sources.""" anonymous_texts = [ "익명의 소식통에 따르면", "내부 관계자가 밝힌 바에 따르면", "한 관계자는 말했다", "소식통에 의하면" ] for text in anonymous_texts: patterns = await detector.detect_patterns(text) anonymous_patterns = [p for p in patterns if p.pattern_type == "anonymous_source"] assert len(anonymous_patterns) > 0 @pytest.mark.asyncio async def test_unverified_claim_detection(self, detector): """Test detection of unverified claims.""" unverified_texts = [ "아직 공식 확인되지 않았습니다", "확인되지 않은 정보입니다", "추측에 불과합니다", "루머로 알려져 있습니다" ] for text in unverified_texts: patterns = await detector.detect_patterns(text) unverified_patterns = [p for p in patterns if p.pattern_type == "unverified_claim"] assert len(unverified_patterns) > 0 @pytest.mark.asyncio async def test_sensational_language_detection(self, detector): """Test detection of sensational language.""" sensational_texts = [ "충격적인 사실이 밝혀졌습니다!", "놀라운 비밀이 폭로되었습니다", "긴급속보! 대박 소식!", "이것은 정말 대단한 일입니다!!!" ] for text in sensational_texts: patterns = await detector.detect_patterns(text) sensational_patterns = [p for p in patterns if p.pattern_type == "sensational_language"] assert len(sensational_patterns) > 0 @pytest.mark.asyncio async def test_vague_language_detection(self, detector): """Test detection of vague language patterns.""" vague_texts = [ "어떤 사람들은 말한다", "일부에서는 추측하고 있다", "~라는 말이 있다", "~일 가능성이 있다고 한다" ] for text in vague_texts: patterns = await detector.detect_patterns(text) vague_patterns = [p for p in patterns if p.pattern_type == "vague_language"] assert len(vague_patterns) > 0 @pytest.mark.asyncio async def test_rumor_type_classification(self, detector): """Test rumor type classification.""" test_cases = [ { "content": "삼성전자가 애플을 인수한다는 소문이 돌고 있습니다", "expected_type": RumorType.MISINFORMATION }, { "content": "코로나19 백신이 5G와 연결되어 있다는 가짜 뉴스", "expected_type": RumorType.CONSPIRACY }, { "content": "A회사 주가가 내일 폭등할 예정이라는 찌라시", "expected_type": RumorType.MARKET_MANIPULATION }, { "content": "정치인 A가 비리에 연루되었다는 근거없는 주장", "expected_type": RumorType.DEFAMATION } ] for case in test_cases: news_item = {"content": case["content"], "id": "test"} result = await detector.analyze_rumor(news_item) # Classification might not be perfect, so we just check it's a valid type assert result.rumor_type in [t.value for t in RumorType] @pytest.mark.asyncio async def test_propagation_analysis(self, detector, sample_news_item): """Test analysis of rumor propagation patterns.""" propagation = await detector.analyze_propagation(sample_news_item) assert isinstance(propagation, PropagationMetrics) assert hasattr(propagation, 'viral_coefficient') assert hasattr(propagation, 'spread_velocity') assert hasattr(propagation, 'engagement_ratio') assert hasattr(propagation, 'bot_activity_score') @pytest.mark.asyncio async def test_cross_reference_verification(self, detector): """Test cross-referencing with reliable sources.""" news_item = { "id": "test_news", "title": "삼성전자 CEO 교체 루머", "content": "삼성전자 CEO가 교체될 것이라는 소문이 돌고 있습니다.", "entities": {"companies": ["삼성전자"]} } verification_result = await detector.cross_reference_verification(news_item) assert "verification_score" in verification_result assert "matching_sources" in verification_result assert "contradiction_sources" in verification_result assert isinstance(verification_result["verification_score"], float) @pytest.mark.asyncio async def test_temporal_consistency_check(self, detector): """Test temporal consistency of news items.""" # Create timeline of related news news_timeline = [ { "timestamp": datetime.now(timezone.utc) - timedelta(hours=5), "content": "삼성전자 실적 발표 예정", "sentiment": "neutral" }, { "timestamp": datetime.now(timezone.utc) - timedelta(hours=3), "content": "삼성전자 주가 급등 전망", "sentiment": "positive" }, { "timestamp": datetime.now(timezone.utc) - timedelta(hours=1), "content": "삼성전자 비밀 프로젝트 소문", "sentiment": "speculative" } ] consistency = await detector.check_temporal_consistency(news_timeline) assert "consistency_score" in consistency assert "anomalies" in consistency assert "trend_analysis" in consistency @pytest.mark.asyncio async def test_linguistic_analysis(self, detector): """Test linguistic analysis for rumor detection.""" test_texts = [ "확실한 정보에 따르면 삼성전자가...", # High certainty "아마도 삼성전자가... 일 것 같습니다", # Low certainty "들리는 바에 따르면 삼성전자가...", # Hearsay "공식 발표에 따르면 삼성전자가..." # Official ] for text in test_texts: analysis = await detector.analyze_linguistic_features(text) assert "certainty_score" in analysis assert "formality_score" in analysis assert "objectivity_score" in analysis assert all(0 <= score <= 1 for score in analysis.values()) @pytest.mark.asyncio async def test_social_signals_analysis(self, detector): """Test analysis of social media signals.""" social_data = { "shares": 5000, "likes": 15000, "comments": 1200, "retweets": 3000, "mentions": 800, "hashtags": ["#삼성전자", "#주식", "#속보"], "user_types": { "verified": 50, "regular": 4800, "suspicious": 150 } } signals = await detector.analyze_social_signals(social_data) assert "viral_score" in signals assert "authenticity_score" in signals assert "bot_probability" in signals assert "engagement_quality" in signals @pytest.mark.asyncio async def test_fact_checking_integration(self, detector): """Test integration with fact-checking services.""" claim = "삼성전자가 애플을 인수한다" with patch.object(detector, '_call_fact_check_api') as mock_api: mock_api.return_value = { "fact_check_result": "false", "confidence": 0.95, "sources": ["reuters", "bloomberg"], "explanation": "No evidence found" } fact_check = await detector.check_facts(claim) assert fact_check["fact_check_result"] == "false" assert fact_check["confidence"] == 0.95 mock_api.assert_called_once_with(claim) @pytest.mark.asyncio async def test_rumor_score_calculation(self, detector): """Test rumor score calculation methodology.""" # High rumor score case high_rumor_news = { "source": "unknown_blog", "author": "anonymous", "content": "익명의 소식통에 따르면 충격적인 비밀이 곧 폭로됩니다!!!", "social_shares": 10000, "verification_score": 0.1 } # Low rumor score case low_rumor_news = { "source": "official_press", "author": "PR Team", "content": "공식 보도자료에 따르면 회사가 정기 실적을 발표했습니다.", "social_shares": 500, "verification_score": 0.9 } high_result = await detector.calculate_rumor_score(high_rumor_news) low_result = await detector.calculate_rumor_score(low_rumor_news) assert high_result > low_result assert high_result > 0.6 # Should be high rumor score (adjusted threshold) assert low_result < 0.4 # Should be low rumor score (adjusted threshold) @pytest.mark.asyncio async def test_rumor_cluster_detection(self, detector): """Test detection of rumor clusters.""" similar_rumors = [ {"id": "1", "content": "삼성전자 CEO 교체 소문", "timestamp": datetime.now(timezone.utc)}, {"id": "2", "content": "삼성전자 최고경영자 변경 루머", "timestamp": datetime.now(timezone.utc)}, {"id": "3", "content": "삼성전자 리더십 교체설", "timestamp": datetime.now(timezone.utc)}, {"id": "4", "content": "LG전자 신제품 출시", "timestamp": datetime.now(timezone.utc)} # Different topic ] clusters = await detector.detect_rumor_clusters(similar_rumors) assert len(clusters) >= 1 # Check if clustering works (might not cluster all similar items due to algorithm) total_items = sum(len(cluster) for cluster in clusters) assert total_items == 4 # All items should be in some cluster @pytest.mark.asyncio async def test_confidence_score_calculation(self, detector): """Test confidence score calculation.""" # High confidence case (clear patterns) clear_case = { "content": "익명의 소식통이 밝힌 확인되지 않은 충격적인 비밀!!!", "source_reliability": 0.1, "pattern_matches": 5 } # Low confidence case (ambiguous) ambiguous_case = { "content": "회사가 새로운 계획을 발표할 예정입니다.", "source_reliability": 0.6, "pattern_matches": 1 } clear_confidence = await detector.calculate_confidence(clear_case) ambiguous_confidence = await detector.calculate_confidence(ambiguous_case) assert clear_confidence > ambiguous_confidence assert 0.0 <= clear_confidence <= 1.0 assert 0.0 <= ambiguous_confidence <= 1.0 @pytest.mark.asyncio async def test_real_time_rumor_tracking(self, detector): """Test real-time rumor tracking and updates.""" rumor_id = "rumor_123" initial_data = { "content": "삼성전자 관련 소문", "social_shares": 100, "timestamp": datetime.now(timezone.utc) } # Start tracking await detector.start_rumor_tracking(rumor_id, initial_data) # Update with new data updated_data = { "social_shares": 1000, # Increased shares "new_sources": 5 } tracking_result = await detector.update_rumor_tracking(rumor_id, updated_data) assert "spread_rate" in tracking_result assert "escalation_level" in tracking_result assert tracking_result["spread_rate"] > 0 @pytest.mark.asyncio async def test_rumor_mitigation_recommendations(self, detector): """Test generation of rumor mitigation recommendations.""" high_risk_rumor = { "rumor_score": 0.9, "rumor_type": RumorType.MARKET_MANIPULATION.value, "viral_coefficient": 2.5, "entities": {"companies": ["삼성전자"]} } recommendations = await detector.generate_mitigation_recommendations(high_risk_rumor) assert isinstance(recommendations, list) assert len(recommendations) > 0 for rec in recommendations: assert "action" in rec assert "priority" in rec assert "rationale" in rec @pytest.mark.asyncio async def test_historical_rumor_analysis(self, detector): """Test analysis of historical rumor patterns.""" historical_data = [ { "date": "2024-01-01", "rumor_type": "market_manipulation", "target_entity": "삼성전자", "outcome": "debunked", "spread_time": 48 # hours }, { "date": "2024-01-15", "rumor_type": "misinformation", "target_entity": "삼성전자", "outcome": "partially_true", "spread_time": 72 } ] analysis = await detector.analyze_historical_patterns(historical_data) assert "common_patterns" in analysis assert "seasonal_trends" in analysis assert "target_analysis" in analysis assert "effectiveness_metrics" in analysis @pytest.mark.asyncio async def test_multi_language_rumor_detection(self, detector): """Test rumor detection in multiple languages.""" multilingual_texts = { "korean": "익명의 소식통에 따르면 충격적인 발표가 있을 예정입니다", "english": "According to anonymous sources, there will be a shocking announcement", "mixed": "Anonymous source says 삼성전자 will make 충격적인 발표" } for lang, text in multilingual_texts.items(): patterns = await detector.detect_patterns(text) # Should detect at least some patterns regardless of language assert len(patterns) > 0 @pytest.mark.asyncio async def test_rumor_verification_workflow(self, detector): """Test complete rumor verification workflow.""" news_item = { "id": "workflow_test", "title": "삼성전자 관련 중요 발표", "content": "내부 관계자에 따르면 삼성전자가 곧 중대 발표를 할 예정이라고 합니다.", "source": "tech_blog", "social_metrics": {"shares": 2000, "engagement": 5000} } workflow_result = await detector.run_verification_workflow(news_item) assert "rumor_analysis" in workflow_result assert "source_check" in workflow_result assert "fact_verification" in workflow_result assert "final_assessment" in workflow_result assert "recommendations" in workflow_result @pytest.mark.asyncio async def test_rumor_impact_assessment(self, detector): """Test assessment of rumor impact on markets/reputation.""" market_rumor = { "content": "삼성전자 주가 조작 의혹", "rumor_type": RumorType.MARKET_MANIPULATION.value, "target_entities": ["삼성전자"], "reach": 100000, "credibility": 0.3 } impact = await detector.assess_rumor_impact(market_rumor) assert "market_impact_score" in impact assert "reputation_damage_score" in impact assert "investor_sentiment_effect" in impact assert "estimated_financial_impact" in impact @pytest.mark.asyncio async def test_automated_response_system(self, detector): """Test automated response system for detected rumors.""" critical_rumor = { "rumor_score": 0.95, "confidence": 0.9, "rumor_type": RumorType.MARKET_MANIPULATION.value, "viral_coefficient": 3.0 } response = await detector.generate_automated_response(critical_rumor) assert "response_type" in response assert "urgency_level" in response assert "suggested_actions" in response assert "notification_targets" in response @pytest.mark.asyncio async def test_rumor_network_analysis(self, detector): """Test analysis of rumor propagation networks.""" propagation_data = { "initial_source": "user_123", "propagation_path": [ {"user": "user_123", "followers": 1000, "timestamp": datetime.now(timezone.utc)}, {"user": "user_456", "followers": 5000, "timestamp": datetime.now(timezone.utc)}, {"user": "user_789", "followers": 50000, "timestamp": datetime.now(timezone.utc)} ], "amplifiers": ["bot_network_1", "suspicious_account_2"] } network_analysis = await detector.analyze_propagation_network(propagation_data) assert "network_structure" in network_analysis assert "key_amplifiers" in network_analysis assert "suspicious_patterns" in network_analysis assert "intervention_points" in network_analysis @pytest.mark.asyncio async def test_rumor_evolution_tracking(self, detector): """Test tracking of how rumors evolve over time.""" rumor_versions = [ { "version": 1, "content": "삼성전자에 문제가 있다", "timestamp": datetime.now(timezone.utc) - timedelta(hours=6) }, { "version": 2, "content": "삼성전자에 큰 문제가 발생했다", "timestamp": datetime.now(timezone.utc) - timedelta(hours=4) }, { "version": 3, "content": "삼성전자에 치명적 문제가 발생해 주가가 폭락할 것이다", "timestamp": datetime.now(timezone.utc) - timedelta(hours=2) } ] evolution = await detector.track_rumor_evolution(rumor_versions) assert "escalation_pattern" in evolution assert "content_drift" in evolution assert "amplification_factor" in evolution assert "mutation_points" in evolution @pytest.mark.asyncio async def test_performance_optimization(self, detector): """Test performance optimization for batch rumor detection.""" news_batch = [ {"id": f"news_{i}", "content": f"뉴스 내용 {i}", "source": "test"} for i in range(100) ] import time start_time = time.time() results = await detector.analyze_batch_rumors(news_batch) end_time = time.time() assert len(results) == 100 # Should complete batch processing efficiently assert end_time - start_time < 10 # Less than 10 seconds for 100 items @pytest.mark.asyncio async def test_error_handling(self, detector): """Test error handling in rumor detection.""" # Test with invalid input with pytest.raises(RumorError): await detector.analyze_rumor(None) # Test with missing required fields incomplete_news = {"title": "test"} result = await detector.analyze_rumor(incomplete_news) assert 0.0 <= result.rumor_score <= 1.0 # Should handle gracefully def test_rumor_result_dataclass(self): """Test RumorResult dataclass.""" result = RumorResult( news_id="test_123", rumor_score=0.8, rumor_type=RumorType.MISINFORMATION.value, confidence=0.9, evidence=["anonymous_source", "unverified_claim"], source_reliability=SourceReliability( source_name="test_source", reliability_score=0.3, reputation_score=0.2 ) ) assert result.news_id == "test_123" assert result.rumor_score == 0.8 assert result.rumor_type == RumorType.MISINFORMATION.value assert len(result.evidence) == 2 @pytest.mark.asyncio async def test_rumor_detection_integration(self, detector): """Test integration with other analysis systems.""" # Mock integration with sentiment analyzer with patch('src.analysis.sentiment_analyzer.SentimentAnalyzer') as mock_sentiment: mock_sentiment.return_value.analyze.return_value.sentiment = "negative" mock_sentiment.return_value.analyze.return_value.confidence = 0.8 news_item = {"content": "부정적인 루머 내용", "id": "integration_test"} result = await detector.analyze_with_sentiment_integration(news_item) assert "sentiment_correlation" in result assert "integrated_score" in result @pytest.mark.asyncio async def test_rumor_database_operations(self, detector): """Test database operations for rumor storage and retrieval.""" rumor_data = { "news_id": "db_test_123", "rumor_score": 0.7, "detected_at": datetime.now(timezone.utc), "status": "under_investigation" } # Test save save_result = await detector.save_rumor_detection(rumor_data) assert save_result is True # Test retrieve retrieved = await detector.get_rumor_detection("db_test_123") assert retrieved["news_id"] == "db_test_123" assert retrieved["rumor_score"] == 0.7 @pytest.mark.asyncio async def test_rumor_alert_system(self, detector): """Test rumor alert system for high-risk detections.""" high_risk_rumor = { "rumor_score": 0.95, "rumor_type": RumorType.MARKET_MANIPULATION.value, "target_entities": ["삼성전자"], "viral_coefficient": 4.0 } # Should trigger alert alert_triggered = await detector.check_alert_conditions(high_risk_rumor) assert alert_triggered is True # Test alert generation alert = await detector.generate_rumor_alert(high_risk_rumor) assert "alert_level" in alert assert "message" in alert assert "recommended_actions" in alert

Loading blob content...

Latest Blog Posts

How to Test MCP Streamable HTTP Endpoints Using cURL
By punkpeye on January 2, 2026.
tutorial
bash
What is Streamable HTTP in MCP?
By punkpeye on January 2, 2026.
Streamable HTTP
What Is Context Bloat in MCP?
By Om-Shree-0709 on December 16, 2025.
mcp
Context Bloat

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/whdghk1907/mcp-news-collector'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_rumor_detector.py•27.1 kB