Skip to main content
Glama

MCP Market Statistics Server

by whdghk1907
test_sentiment_analyzer.py16.5 kB
"""감정 분석기 테스트""" import pytest import asyncio import json import time from datetime import datetime, timedelta from unittest.mock import AsyncMock, MagicMock, patch from typing import Dict, List, Any from src.ai.sentiment_analyzer import SentimentAnalyzer from src.exceptions import ModelNotTrainedError, InsufficientDataError, PredictionError class TestSentimentAnalyzer: """감정 분석기 테스트""" @pytest.fixture def analyzer_config(self): """분석기 설정""" return { "model_name": "klue/bert-base", "max_length": 512, "batch_size": 16, "confidence_threshold": 0.7, "sentiment_labels": ["negative", "neutral", "positive"], "language": "ko" } @pytest.fixture def sentiment_analyzer(self, analyzer_config): """감정 분석기 인스턴스""" return SentimentAnalyzer(analyzer_config) @pytest.fixture def sample_news_data(self): """샘플 뉴스 데이터""" return [ { "title": "삼성전자 3분기 실적 예상치 상회, 주가 상승 기대", "content": "삼성전자가 3분기 실적에서 시장 예상치를 크게 상회하며 주가 상승 모멘텀을 확보했다.", "timestamp": "2024-01-15T10:30:00", "source": "경제신문", "symbols": ["005930"] }, { "title": "반도체 업계 전망 부정적, 메모리 가격 하락 우려", "content": "글로벌 반도체 수요 둔화로 메모리 반도체 가격 하락이 예상된다는 분석이 나왔다.", "timestamp": "2024-01-15T14:20:00", "source": "IT뉴스", "symbols": ["005930", "000660"] }, { "title": "코스피 지수 2500선 회복, 외국인 순매수 지속", "content": "외국인 투자자들의 순매수세가 이어지면서 코스피가 2500선을 회복했다.", "timestamp": "2024-01-15T16:00:00", "source": "증권뉴스", "symbols": ["KOSPI"] } ] def test_analyzer_initialization(self, sentiment_analyzer, analyzer_config): """분석기 초기화 테스트""" assert sentiment_analyzer.model_name == analyzer_config["model_name"] assert sentiment_analyzer.max_length == analyzer_config["max_length"] assert sentiment_analyzer.batch_size == analyzer_config["batch_size"] assert sentiment_analyzer.confidence_threshold == analyzer_config["confidence_threshold"] assert sentiment_analyzer.sentiment_labels == analyzer_config["sentiment_labels"] assert sentiment_analyzer.language == analyzer_config["language"] assert sentiment_analyzer.is_initialized == False @pytest.mark.asyncio async def test_model_initialization(self, sentiment_analyzer): """모델 초기화 테스트""" # 초기화 전 상태 assert sentiment_analyzer.is_initialized == False # 모델 초기화 success = await sentiment_analyzer.initialize_model() # 초기화 후 상태 assert success == True assert sentiment_analyzer.is_initialized == True @pytest.mark.asyncio async def test_single_text_analysis(self, sentiment_analyzer): """단일 텍스트 감정 분석 테스트""" await sentiment_analyzer.initialize_model() positive_text = "삼성전자 주가가 크게 상승하며 투자자들의 기대감이 높아지고 있다." result = await sentiment_analyzer.analyze_text(positive_text) # 결과 구조 확인 assert 'sentiment' in result assert 'confidence' in result assert 'scores' in result assert 'processing_time' in result # 감정 라벨 확인 assert result['sentiment'] in sentiment_analyzer.sentiment_labels # 신뢰도 확인 assert 0 <= result['confidence'] <= 1 # 점수 확인 scores = result['scores'] assert len(scores) == len(sentiment_analyzer.sentiment_labels) assert abs(sum(scores.values()) - 1.0) < 0.01 # 합이 1에 가까움 @pytest.mark.asyncio async def test_batch_text_analysis(self, sentiment_analyzer, sample_news_data): """배치 텍스트 감정 분석 테스트""" await sentiment_analyzer.initialize_model() texts = [news['title'] + ' ' + news['content'] for news in sample_news_data] results = await sentiment_analyzer.analyze_batch(texts) # 결과 개수 확인 assert len(results) == len(texts) # 각 결과 확인 for result in results: assert 'sentiment' in result assert 'confidence' in result assert 'scores' in result assert result['sentiment'] in sentiment_analyzer.sentiment_labels @pytest.mark.asyncio async def test_news_sentiment_analysis(self, sentiment_analyzer, sample_news_data): """뉴스 감정 분석 테스트""" await sentiment_analyzer.initialize_model() results = await sentiment_analyzer.analyze_news(sample_news_data) # 결과 구조 확인 assert len(results) == len(sample_news_data) for i, result in enumerate(results): assert 'news_id' in result assert 'sentiment' in result assert 'confidence' in result assert 'symbols' in result assert 'timestamp' in result assert 'source' in result assert 'title_sentiment' in result assert 'content_sentiment' in result # 원본 데이터와 매칭 확인 assert result['symbols'] == sample_news_data[i]['symbols'] assert result['timestamp'] == sample_news_data[i]['timestamp'] @pytest.mark.asyncio async def test_symbol_sentiment_aggregation(self, sentiment_analyzer, sample_news_data): """심볼별 감정 집계 테스트""" await sentiment_analyzer.initialize_model() news_results = await sentiment_analyzer.analyze_news(sample_news_data) symbol_sentiments = await sentiment_analyzer.aggregate_sentiment_by_symbol(news_results) # 집계 결과 확인 assert '005930' in symbol_sentiments assert 'KOSPI' in symbol_sentiments # 집계 데이터 구조 확인 for symbol, sentiment_data in symbol_sentiments.items(): assert 'overall_sentiment' in sentiment_data assert 'sentiment_score' in sentiment_data assert 'news_count' in sentiment_data assert 'confidence_avg' in sentiment_data assert 'sentiment_distribution' in sentiment_data assert 'trend_analysis' in sentiment_data # 점수 범위 확인 assert -1 <= sentiment_data['sentiment_score'] <= 1 assert sentiment_data['news_count'] > 0 @pytest.mark.asyncio async def test_time_series_sentiment(self, sentiment_analyzer, sample_news_data): """시계열 감정 분석 테스트""" await sentiment_analyzer.initialize_model() # 시간대별 뉴스 데이터 생성 extended_news = [] for i in range(24): # 24시간 for news in sample_news_data: news_copy = news.copy() news_copy['timestamp'] = f"2024-01-15T{i:02d}:00:00" extended_news.append(news_copy) news_results = await sentiment_analyzer.analyze_news(extended_news) time_series = await sentiment_analyzer.get_sentiment_time_series(news_results, interval='1H') # 시계열 데이터 확인 assert len(time_series) > 0 for timestamp, sentiment_data in time_series.items(): assert 'sentiment_score' in sentiment_data assert 'news_count' in sentiment_data assert 'dominant_sentiment' in sentiment_data assert 'volatility' in sentiment_data @pytest.mark.asyncio async def test_sentiment_impact_analysis(self, sentiment_analyzer, sample_news_data): """감정 영향도 분석 테스트""" await sentiment_analyzer.initialize_model() # 가격 데이터 (감정 분석 결과와 연동) price_data = { '005930': { '2024-01-15T09:00:00': 75000, '2024-01-15T15:00:00': 76500, '2024-01-15T16:30:00': 77200 } } news_results = await sentiment_analyzer.analyze_news(sample_news_data) impact_analysis = await sentiment_analyzer.analyze_sentiment_impact(news_results, price_data) # 영향도 분석 결과 확인 assert 'correlation_analysis' in impact_analysis assert 'impact_scores' in impact_analysis assert 'significant_events' in impact_analysis assert 'lag_analysis' in impact_analysis # 상관관계 분석 확인 correlation = impact_analysis['correlation_analysis'] assert 'sentiment_price_correlation' in correlation assert -1 <= correlation['sentiment_price_correlation'] <= 1 @pytest.mark.asyncio async def test_sentiment_preprocessing(self, sentiment_analyzer): """텍스트 전처리 테스트""" await sentiment_analyzer.initialize_model() raw_text = " 삼성전자!!! 주가가 @#$% 상승했습니다... " # 텍스트 정제 cleaned = sentiment_analyzer._clean_text(raw_text) assert cleaned.strip() == cleaned assert not any(char in cleaned for char in ['@', '#', '$', '%']) # 토큰화 tokens = sentiment_analyzer._tokenize(cleaned) assert isinstance(tokens, (list, dict)) # 특수 토큰 처리 processed = sentiment_analyzer._prepare_input(cleaned) assert isinstance(processed, dict) assert 'input_ids' in processed or 'tokens' in processed @pytest.mark.asyncio async def test_confidence_filtering(self, sentiment_analyzer): """신뢰도 필터링 테스트""" await sentiment_analyzer.initialize_model() # 다양한 신뢰도의 결과 시뮬레이션 mock_results = [ {'sentiment': 'positive', 'confidence': 0.9}, {'sentiment': 'negative', 'confidence': 0.5}, {'sentiment': 'neutral', 'confidence': 0.8} ] filtered_results = await sentiment_analyzer.filter_by_confidence( mock_results, threshold=0.7 ) # 신뢰도 임계값 이상만 남아있는지 확인 assert len(filtered_results) == 2 for result in filtered_results: assert result['confidence'] >= 0.7 @pytest.mark.asyncio async def test_sentiment_trend_analysis(self, sentiment_analyzer, sample_news_data): """감정 트렌드 분석 테스트""" await sentiment_analyzer.initialize_model() # 시간순 뉴스 데이터 time_ordered_news = [] for i in range(10): for news in sample_news_data: news_copy = news.copy() news_copy['timestamp'] = f"2024-01-{15+i:02d}T10:00:00" time_ordered_news.append(news_copy) news_results = await sentiment_analyzer.analyze_news(time_ordered_news) trend_analysis = await sentiment_analyzer.analyze_sentiment_trends(news_results) # 트렌드 분석 결과 확인 assert 'trend_direction' in trend_analysis assert 'trend_strength' in trend_analysis assert 'trend_acceleration' in trend_analysis assert 'turning_points' in trend_analysis assert trend_analysis['trend_direction'] in ['up', 'down', 'stable'] assert 0 <= trend_analysis['trend_strength'] <= 1 @pytest.mark.asyncio async def test_multilingual_support(self, sentiment_analyzer): """다국어 지원 테스트""" # 영어 설정으로 분석기 재구성 english_config = { "model_name": "bert-base-uncased", "language": "en", "sentiment_labels": ["negative", "neutral", "positive"] } en_analyzer = SentimentAnalyzer(english_config) await en_analyzer.initialize_model() english_text = "Samsung Electronics stock price is rising strongly today." result = await en_analyzer.analyze_text(english_text) # 영어 분석 결과 확인 assert 'sentiment' in result assert result['sentiment'] in en_analyzer.sentiment_labels @pytest.mark.asyncio async def test_model_caching(self, sentiment_analyzer): """모델 캐싱 테스트""" # 첫 번째 초기화 await sentiment_analyzer.initialize_model() first_init_time = time.time() # 두 번째 초기화 (캐시 사용) await sentiment_analyzer.initialize_model() second_init_time = time.time() # 캐시 사용으로 빨라졌는지 확인 assert sentiment_analyzer.is_initialized == True @pytest.mark.asyncio async def test_error_handling(self, sentiment_analyzer): """오류 처리 테스트""" # 초기화 없이 분석 시도 with pytest.raises(ModelNotTrainedError): await sentiment_analyzer.analyze_text("test text") # 빈 텍스트 분석 await sentiment_analyzer.initialize_model() result = await sentiment_analyzer.analyze_text("") assert 'sentiment' in result assert result['sentiment'] == 'neutral' # 기본값 # 너무 긴 텍스트 처리 long_text = "긴 텍스트 " * 1000 result = await sentiment_analyzer.analyze_text(long_text) assert 'sentiment' in result # 잘려서 처리됨 @pytest.mark.asyncio async def test_performance_metrics(self, sentiment_analyzer, sample_news_data): """성능 메트릭 테스트""" await sentiment_analyzer.initialize_model() start_time = time.time() results = await sentiment_analyzer.analyze_news(sample_news_data) end_time = time.time() processing_time = end_time - start_time # 성능 메트릭 확인 metrics = sentiment_analyzer.get_performance_metrics() assert 'total_texts_processed' in metrics assert 'average_processing_time' in metrics assert 'cache_hit_rate' in metrics # 처리 시간이 합리적인지 확인 assert processing_time < 10.0 # 10초 이내 def test_sentiment_scoring(self, sentiment_analyzer): """감정 점수 계산 테스트""" # 감정 레이블을 점수로 변환 positive_score = sentiment_analyzer._sentiment_to_score("positive") neutral_score = sentiment_analyzer._sentiment_to_score("neutral") negative_score = sentiment_analyzer._sentiment_to_score("negative") assert positive_score > neutral_score > negative_score assert -1 <= negative_score <= 1 assert -1 <= neutral_score <= 1 assert -1 <= positive_score <= 1 @pytest.mark.asyncio async def test_batch_processing_optimization(self, sentiment_analyzer): """배치 처리 최적화 테스트""" await sentiment_analyzer.initialize_model() # 큰 배치 처리 large_batch = ["테스트 텍스트 " + str(i) for i in range(100)] start_time = time.time() results = await sentiment_analyzer.analyze_batch(large_batch) end_time = time.time() # 결과 확인 assert len(results) == len(large_batch) # 배치 처리가 개별 처리보다 빠른지 확인 batch_time_per_item = (end_time - start_time) / len(large_batch) assert batch_time_per_item < 0.1 # 아이템당 0.1초 이내

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/whdghk1907/mcp-market-statistics'

If you have feedback or need assistance with the MCP directory API, please join our Discord server