"""Tests for sentiment analyzer."""
import pytest
from unittest.mock import Mock, AsyncMock, patch
from datetime import datetime, timezone
from src.analysis.sentiment_analyzer import SentimentAnalyzer, SentimentResult, SentimentError
class TestSentimentAnalyzer:
"""Test cases for SentimentAnalyzer."""
@pytest.fixture
def analyzer(self):
"""Create SentimentAnalyzer instance for testing."""
return SentimentAnalyzer()
@pytest.fixture
def sample_news(self):
"""Sample news data for testing."""
return {
"title": "삼성전자 주가 급등, 신제품 출시 기대감",
"content": "삼성전자가 혁신적인 신제품 출시를 앞두고 있어 투자자들의 기대감이 높아지고 있다. 주가는 전일 대비 5% 상승했다.",
"url": "https://example.com/news/1",
"published_at": datetime.now(timezone.utc),
"source": "test"
}
def test_sentiment_analyzer_initialization(self, analyzer):
"""Test SentimentAnalyzer initialization."""
assert analyzer is not None
assert hasattr(analyzer, 'analyze')
assert hasattr(analyzer, 'analyze_batch')
@pytest.mark.asyncio
async def test_analyze_positive_sentiment(self, analyzer):
"""Test analyzing positive sentiment text."""
text = "삼성전자 주가가 크게 상승했습니다. 실적이 매우 좋아 투자자들이 기뻐하고 있습니다."
result = await analyzer.analyze(text)
assert isinstance(result, SentimentResult)
assert result.sentiment in ["positive", "negative", "neutral"]
assert 0 <= result.score <= 1
assert result.confidence >= 0
@pytest.mark.asyncio
async def test_analyze_negative_sentiment(self, analyzer):
"""Test analyzing negative sentiment text."""
text = "주가가 폭락했습니다. 실적이 나빠서 투자자들이 실망하고 있습니다. 전망이 매우 어둡습니다."
result = await analyzer.analyze(text)
assert isinstance(result, SentimentResult)
assert result.sentiment in ["positive", "negative", "neutral"]
assert 0 <= result.score <= 1
@pytest.mark.asyncio
async def test_analyze_neutral_sentiment(self, analyzer):
"""Test analyzing neutral sentiment text."""
text = "오늘 코스피 지수는 전일과 비슷한 수준에서 마감했습니다. 거래량은 평균 수준입니다."
result = await analyzer.analyze(text)
assert isinstance(result, SentimentResult)
assert result.sentiment in ["positive", "negative", "neutral"]
@pytest.mark.asyncio
async def test_analyze_news_article(self, analyzer, sample_news):
"""Test analyzing a complete news article."""
result = await analyzer.analyze_news(sample_news)
assert isinstance(result, dict)
assert "title_sentiment" in result
assert "content_sentiment" in result
assert "overall_sentiment" in result
assert "keywords" in result
@pytest.mark.asyncio
async def test_analyze_batch(self, analyzer):
"""Test batch sentiment analysis."""
texts = [
"주가가 상승했습니다. 좋은 소식입니다.",
"실적이 나빠졌습니다. 주가가 하락했습니다.",
"시장은 보합세를 유지했습니다."
]
results = await analyzer.analyze_batch(texts)
assert len(results) == 3
for result in results:
assert isinstance(result, SentimentResult)
@pytest.mark.asyncio
async def test_extract_keywords(self, analyzer):
"""Test keyword extraction from text."""
text = "삼성전자와 SK하이닉스가 반도체 시장에서 경쟁하고 있습니다. 메모리 반도체 가격이 상승하고 있습니다."
keywords = await analyzer.extract_keywords(text)
assert isinstance(keywords, list)
assert len(keywords) > 0
assert all(isinstance(kw, tuple) and len(kw) == 2 for kw in keywords) # (keyword, score)
@pytest.mark.asyncio
async def test_detect_financial_entities(self, analyzer):
"""Test financial entity detection."""
text = "삼성전자(005930)와 카카오(035720)가 상승세를 보였습니다. 코스피는 2,500포인트를 돌파했습니다."
entities = await analyzer.detect_financial_entities(text)
assert isinstance(entities, dict)
assert "companies" in entities
assert "stock_codes" in entities
assert "indices" in entities
assert "currencies" in entities
def test_calculate_sentiment_score(self, analyzer):
"""Test sentiment score calculation."""
# Test various sentiment words
positive_words = ["상승", "호재", "기대", "성장", "이익"]
negative_words = ["하락", "악재", "우려", "손실", "위기"]
for word in positive_words:
score = analyzer._calculate_word_sentiment(word)
assert score > 0
for word in negative_words:
score = analyzer._calculate_word_sentiment(word)
assert score < 0
@pytest.mark.asyncio
async def test_analyze_market_terms(self, analyzer):
"""Test analysis of market-specific terms."""
market_texts = [
"매수 추천, 목표가 상향",
"매도 추천, 투자의견 하향",
"중립 유지, 관망 필요"
]
for text in market_texts:
result = await analyzer.analyze(text)
assert result.sentiment in ["positive", "negative", "neutral"]
@pytest.mark.asyncio
async def test_handle_empty_text(self, analyzer):
"""Test handling of empty or None text."""
empty_texts = ["", " ", None]
for text in empty_texts:
result = await analyzer.analyze(text)
assert result.sentiment == "neutral"
assert result.score == 0.5
@pytest.mark.asyncio
async def test_handle_very_long_text(self, analyzer):
"""Test handling of very long text."""
long_text = "주가가 상승했습니다. " * 1000 # Very long text
result = await analyzer.analyze(long_text)
assert isinstance(result, SentimentResult)
# Should handle long text without error
def test_sentiment_result_dataclass(self):
"""Test SentimentResult dataclass."""
result = SentimentResult(
sentiment="positive",
score=0.8,
confidence=0.9,
keywords=["상승", "호재"],
entities={"companies": ["삼성전자"]}
)
assert result.sentiment == "positive"
assert result.score == 0.8
assert result.confidence == 0.9
assert "상승" in result.keywords
assert "삼성전자" in result.entities["companies"]
@pytest.mark.asyncio
async def test_analyze_with_context(self, analyzer):
"""Test sentiment analysis with context."""
text = "삼성전자 주가가 하락했지만 장기적으로는 긍정적입니다."
context = {
"company": "삼성전자",
"sector": "반도체",
"previous_sentiment": "positive"
}
result = await analyzer.analyze_with_context(text, context)
assert isinstance(result, SentimentResult)
assert hasattr(result, 'context_adjusted')
@pytest.mark.asyncio
async def test_time_weighted_sentiment(self, analyzer):
"""Test time-weighted sentiment analysis."""
news_items = [
{
"content": "주가 급등",
"published_at": datetime.now(timezone.utc),
"weight": 1.0
},
{
"content": "주가 하락",
"published_at": datetime.now(timezone.utc),
"weight": 0.5
}
]
result = await analyzer.analyze_time_weighted(news_items)
assert isinstance(result, SentimentResult)
assert hasattr(result, 'weighted_score')
@pytest.mark.asyncio
async def test_sentiment_trends(self, analyzer):
"""Test sentiment trend analysis."""
historical_sentiments = [
{"date": "2024-01-01", "sentiment": "positive", "score": 0.7},
{"date": "2024-01-02", "sentiment": "positive", "score": 0.8},
{"date": "2024-01-03", "sentiment": "negative", "score": 0.3},
{"date": "2024-01-04", "sentiment": "neutral", "score": 0.5}
]
trend = await analyzer.analyze_trend(historical_sentiments)
assert "direction" in trend # "improving", "declining", "stable"
assert "strength" in trend
assert "volatility" in trend
@pytest.mark.asyncio
async def test_domain_specific_lexicon(self, analyzer):
"""Test domain-specific sentiment lexicon."""
# Stock market specific terms
bullish_terms = ["강세", "상승세", "돌파", "신고가", "매수우위"]
bearish_terms = ["약세", "하락세", "붕괴", "신저가", "매도우위"]
for term in bullish_terms:
result = await analyzer.analyze(f"시장이 {term}를 보이고 있습니다.")
# Bullish terms should lean positive
for term in bearish_terms:
result = await analyzer.analyze(f"시장이 {term}를 보이고 있습니다.")
# Bearish terms should lean negative
@pytest.mark.asyncio
async def test_sentiment_aggregation(self, analyzer):
"""Test aggregating sentiments from multiple sources."""
sentiments = [
SentimentResult("positive", 0.8, 0.9),
SentimentResult("positive", 0.7, 0.8),
SentimentResult("neutral", 0.5, 0.6),
SentimentResult("negative", 0.3, 0.7)
]
aggregated = await analyzer.aggregate_sentiments(sentiments)
assert isinstance(aggregated, SentimentResult)
assert aggregated.sentiment in ["positive", "negative", "neutral", "mixed"]
@pytest.mark.asyncio
async def test_confidence_calculation(self, analyzer):
"""Test confidence score calculation."""
# Clear sentiment should have high confidence
clear_positive = "매우 좋습니다. 훌륭합니다. 최고입니다."
result1 = await analyzer.analyze(clear_positive)
# Mixed sentiment should have lower confidence
mixed = "좋기도 하고 나쁘기도 합니다."
result2 = await analyzer.analyze(mixed)
assert result1.confidence > result2.confidence
@pytest.mark.asyncio
async def test_language_detection(self, analyzer):
"""Test language detection for sentiment analysis."""
texts = {
"korean": "삼성전자 주가가 상승했습니다.",
"english": "Samsung stock price increased.",
"mixed": "Samsung 주가가 up했습니다."
}
for lang, text in texts.items():
result = await analyzer.analyze(text)
assert hasattr(result, 'language')
@pytest.mark.asyncio
async def test_sarcasm_detection(self, analyzer):
"""Test sarcasm detection in sentiment analysis."""
sarcastic_text = "아주 좋네요, 주가가 반토막 났는데"
result = await analyzer.analyze(sarcastic_text)
# Should detect potential sarcasm
assert hasattr(result, 'sarcasm_detected')
@pytest.mark.asyncio
async def test_emoji_sentiment(self, analyzer):
"""Test sentiment analysis with emojis."""
texts_with_emoji = [
"주가 상승 📈 😊",
"주가 하락 📉 😢",
"보합세 😐"
]
for text in texts_with_emoji:
result = await analyzer.analyze(text)
assert isinstance(result, SentimentResult)
@pytest.mark.asyncio
async def test_aspect_based_sentiment(self, analyzer):
"""Test aspect-based sentiment analysis."""
text = "삼성전자의 실적은 좋았지만 전망은 불확실합니다. 경영진은 훌륭합니다."
aspects = await analyzer.analyze_aspects(text)
assert "실적" in aspects
assert "전망" in aspects
assert "경영진" in aspects
# Each aspect should have its own sentiment
for aspect, sentiment in aspects.items():
assert sentiment in ["positive", "negative", "neutral"]
@pytest.mark.asyncio
async def test_realtime_sentiment_update(self, analyzer):
"""Test real-time sentiment updates."""
# Initial sentiment
initial_text = "시장 전망이 긍정적입니다."
initial_result = await analyzer.analyze(initial_text)
# Update with new information
update_text = "하지만 새로운 리스크가 발견되었습니다."
updated_result = await analyzer.update_sentiment(initial_result, update_text)
# Score should be updated (may be same due to averaging)
assert isinstance(updated_result.score, float)
assert 0 <= updated_result.score <= 1
@pytest.mark.asyncio
async def test_sentiment_explanation(self, analyzer):
"""Test sentiment analysis with explanation."""
text = "주가가 상승했지만 거래량이 적어 우려됩니다."
result = await analyzer.analyze_with_explanation(text)
assert hasattr(result, 'explanation')
assert 'positive_factors' in result.explanation
assert 'negative_factors' in result.explanation
assert 'key_phrases' in result.explanation
def test_custom_sentiment_rules(self, analyzer):
"""Test custom sentiment rules."""
# Add custom rules
analyzer.add_custom_rule("상한가", sentiment="very_positive", score=0.95)
analyzer.add_custom_rule("하한가", sentiment="very_negative", score=0.05)
# Test custom rules
result1 = analyzer.apply_custom_rules("주식이 상한가를 기록했습니다.")
result2 = analyzer.apply_custom_rules("주식이 하한가를 기록했습니다.")
assert result1['score'] > 0.9
assert result2['score'] < 0.1
@pytest.mark.asyncio
async def test_comparative_sentiment(self, analyzer):
"""Test comparative sentiment analysis."""
text = "삼성전자는 LG전자보다 실적이 좋았습니다."
result = await analyzer.analyze_comparative(text)
assert 'entities' in result
assert 'comparisons' in result
# Should have detected entities and potentially comparisons
assert len(result['comparisons']) >= 0 # May or may not find comparisons
if result['comparisons']:
assert 'entity1' in result['comparisons'][0]
assert 'entity2' in result['comparisons'][0]
@pytest.mark.asyncio
async def test_sentiment_persistence(self, analyzer, tmp_path):
"""Test saving and loading sentiment analysis results."""
result = SentimentResult(
sentiment="positive",
score=0.8,
confidence=0.9,
keywords=["상승", "호재"]
)
# Save result
save_path = tmp_path / "sentiment_result.json"
await analyzer.save_result(result, save_path)
# Load result
loaded_result = await analyzer.load_result(save_path)
assert loaded_result.sentiment == result.sentiment
assert loaded_result.score == result.score
@pytest.mark.asyncio
async def test_batch_processing_performance(self, analyzer):
"""Test batch processing performance."""
# Create large batch
large_batch = ["테스트 텍스트"] * 100
import time
start_time = time.time()
results = await analyzer.analyze_batch(large_batch)
end_time = time.time()
assert len(results) == 100
# Should complete within reasonable time (e.g., < 10 seconds)
assert end_time - start_time < 10
@pytest.mark.asyncio
async def test_error_handling(self, analyzer):
"""Test error handling in sentiment analysis."""
# Test with invalid input
with pytest.raises(SentimentError):
await analyzer.analyze({"invalid": "input"})
# Test with unsupported language
result = await analyzer.analyze("これは日本語のテキストです。")
assert result.sentiment == "neutral" # Should default to neutral
@pytest.mark.asyncio
async def test_sentiment_caching(self, analyzer):
"""Test sentiment analysis caching."""
text = "삼성전자 주가가 상승했습니다."
# First call - should analyze
result1 = await analyzer.analyze(text)
# Second call - should use cache
result2 = await analyzer.analyze(text)
assert result1.sentiment == result2.sentiment
assert result1.score == result2.score
def test_sentiment_thresholds(self, analyzer):
"""Test sentiment classification thresholds."""
thresholds = analyzer.get_thresholds()
assert 'positive' in thresholds
assert 'negative' in thresholds
assert 'neutral' in thresholds
# Test threshold adjustment
analyzer.set_thresholds(positive=0.7, negative=0.3)
new_thresholds = analyzer.get_thresholds()
assert new_thresholds['positive'] == 0.7
assert new_thresholds['negative'] == 0.3
@pytest.mark.asyncio
async def test_multilingual_sentiment(self, analyzer):
"""Test multilingual sentiment analysis."""
texts = {
"korean": "주가가 급등했습니다.",
"english": "Stock price surged.",
"mixed": "Stock이 급등했어요."
}
for lang, text in texts.items():
result = await analyzer.analyze(text)
# Should successfully analyze different languages
assert result.sentiment in ["positive", "negative", "neutral"]
@pytest.mark.asyncio
async def test_sentiment_api_integration(self, analyzer):
"""Test integration with external sentiment APIs."""
# Test external API integration (mock implementation)
result = await analyzer.analyze_with_external_api("테스트 텍스트")
assert result.sentiment == "positive"
assert result.score == 0.85