#!/usr/bin/env python3
"""
Unit tests for enhanced AnalyzerService methods
Tests the 5 new methods with synthetic data to ensure they work correctly.
"""
from ytpipe.services.intelligence.analyzer import AnalyzerService
from ytpipe.core.models import VideoMetadata, Chunk
from datetime import datetime
def create_test_metadata():
"""Create sample metadata for testing."""
return VideoMetadata(
video_id="test12345ab",
url="https://youtube.com/watch?v=test12345ab",
title="Introduction to FastAPI - Complete Tutorial",
duration=1200, # 20 minutes
upload_date="20240101",
view_count=50000,
like_count=1500,
channel="Tech Tutorials",
description="Learn FastAPI from scratch"
)
def create_test_chunks():
"""Create sample chunks for testing."""
chunks = [
Chunk(
id=0,
text="Welcome to this FastAPI tutorial. Today we will learn how to build a REST API with Python.",
word_count=18,
start_char=0,
end_char=91,
quality_score=8.5,
timestamp_start="0:00",
timestamp_end="0:15"
),
Chunk(
id=1,
text="First, install FastAPI using pip install fastapi. You should also install uvicorn as the ASGI server.",
word_count=17,
start_char=92,
end_char=196,
quality_score=9.0,
timestamp_start="0:15",
timestamp_end="0:30"
),
Chunk(
id=2,
text="FastAPI is a modern, fast web framework for building APIs with Python. It's excellent for production use.",
word_count=18,
start_char=197,
end_char=304,
quality_score=8.8,
timestamp_start="0:30",
timestamp_end="0:45"
),
Chunk(
id=3,
text="Dr Johnson from Stanford University recommends using Pydantic models for data validation.",
word_count=13,
start_char=305,
end_char=395,
quality_score=7.5,
timestamp_start="0:45",
timestamp_end="1:00"
),
Chunk(
id=4,
text="This is difficult to understand at first, but the implementation is poor without proper testing.",
word_count=16,
start_char=396,
end_char=493,
quality_score=6.0,
timestamp_start="1:00",
timestamp_end="1:15"
)
]
return chunks
def test_generate_summary():
"""Test summary generation."""
print("\n" + "="*60)
print("TEST: generate_summary()")
print("="*60)
analyzer = AnalyzerService()
metadata = create_test_metadata()
chunks = create_test_chunks()
summary = analyzer.generate_summary(metadata, chunks, max_bullets=5)
print(f"Generated {len(summary)} bullet points:")
for i, bullet in enumerate(summary, 1):
print(f" {i}. {bullet[:80]}...")
# Assertions
assert len(summary) >= 3, "Should generate at least 3 bullets"
assert len(summary) <= 5, "Should not exceed max_bullets"
assert all(isinstance(s, str) for s in summary), "All bullets should be strings"
print("✅ PASSED")
return summary
def test_extract_entities():
"""Test entity extraction."""
print("\n" + "="*60)
print("TEST: extract_entities()")
print("="*60)
analyzer = AnalyzerService()
chunks = create_test_chunks()
entities = analyzer.extract_entities(chunks, max_entities=10)
print(f"Extracted {len(entities)} entities:")
for entity in entities:
print(f" - {entity['entity']:20s} [{entity['type']:8s}] count={entity['count']}")
# Assertions
assert isinstance(entities, list), "Should return a list"
assert len(entities) <= 10, "Should not exceed max_entities"
# Check entity structure
for entity in entities:
assert 'entity' in entity, "Entity should have 'entity' key"
assert 'type' in entity, "Entity should have 'type' key"
assert 'count' in entity, "Entity should have 'count' key"
assert entity['type'] in ['person', 'org', 'concept'], "Type should be valid"
# Check for expected entities
entity_names = [e['entity'] for e in entities]
assert 'FastAPI' in entity_names, "Should detect FastAPI"
assert 'Python' in entity_names, "Should detect Python"
print("✅ PASSED")
return entities
def test_analyze_sentiment():
"""Test sentiment analysis."""
print("\n" + "="*60)
print("TEST: analyze_sentiment()")
print("="*60)
analyzer = AnalyzerService()
chunks = create_test_chunks()
sentiment = analyzer.analyze_sentiment(chunks)
print(f"Sentiment: {sentiment['sentiment']}")
print(f"Score: {sentiment['score']}")
print(f"Distribution: {sentiment['distribution']}")
# Assertions
assert 'sentiment' in sentiment, "Should have sentiment key"
assert 'score' in sentiment, "Should have score key"
assert 'distribution' in sentiment, "Should have distribution key"
assert sentiment['sentiment'] in ['positive', 'neutral', 'negative'], "Valid sentiment"
assert 0 <= sentiment['score'] <= 1, "Score should be 0-1"
assert 'positive' in sentiment['distribution'], "Should have positive count"
assert 'negative' in sentiment['distribution'], "Should have negative count"
assert 'neutral' in sentiment['distribution'], "Should have neutral count"
# Our test data has both positive (excellent, modern, fast) and negative (difficult, poor)
# So we expect some of each
assert sentiment['distribution']['positive'] > 0, "Should detect positive words"
assert sentiment['distribution']['negative'] > 0, "Should detect negative words"
print("✅ PASSED")
return sentiment
def test_calculate_difficulty():
"""Test difficulty calculation."""
print("\n" + "="*60)
print("TEST: calculate_difficulty()")
print("="*60)
analyzer = AnalyzerService()
chunks = create_test_chunks()
difficulty = analyzer.calculate_difficulty(chunks)
print(f"Difficulty Level: {difficulty['level']}")
print(f"Difficulty Score: {difficulty['score']}")
print(f"Factors:")
for key, value in difficulty['factors'].items():
print(f" {key}: {value}")
# Assertions
assert 'level' in difficulty, "Should have level key"
assert 'score' in difficulty, "Should have score key"
assert 'factors' in difficulty, "Should have factors key"
valid_levels = ['beginner', 'intermediate', 'advanced', 'expert']
assert difficulty['level'] in valid_levels, f"Level should be one of {valid_levels}"
assert 0 <= difficulty['score'] <= 1, "Score should be 0-1"
# Check factors
factors = difficulty['factors']
assert 'avg_word_length' in factors, "Should have avg_word_length"
assert 'vocab_complexity' in factors, "Should have vocab_complexity"
assert 'avg_sentence_length' in factors, "Should have avg_sentence_length"
assert 'technical_density' in factors, "Should have technical_density"
# Technical content should have some technical density
assert factors['technical_density'] > 0, "Technical content should have technical words"
print("✅ PASSED")
return difficulty
def test_extract_action_items():
"""Test action item extraction."""
print("\n" + "="*60)
print("TEST: extract_action_items()")
print("="*60)
analyzer = AnalyzerService()
chunks = create_test_chunks()
action_items = analyzer.extract_action_items(chunks, max_items=5)
print(f"Extracted {len(action_items)} action items:")
for i, item in enumerate(action_items, 1):
print(f" {i}. {item[:80]}...")
# Assertions
assert isinstance(action_items, list), "Should return a list"
assert len(action_items) <= 5, "Should not exceed max_items"
assert all(isinstance(item, str) for item in action_items), "All items should be strings"
# Our test data has "install FastAPI" and "you should also install"
# Should detect at least one action item
assert len(action_items) > 0, "Should detect action items in test data"
# Check if known action detected
action_text = " ".join(action_items).lower()
assert 'install' in action_text, "Should detect 'install' action"
print("✅ PASSED")
return action_items
def test_empty_chunks():
"""Test all methods handle empty chunks gracefully."""
print("\n" + "="*60)
print("TEST: Empty chunks handling")
print("="*60)
analyzer = AnalyzerService()
metadata = create_test_metadata()
empty_chunks = []
# All methods should handle empty input gracefully
summary = analyzer.generate_summary(metadata, empty_chunks)
assert len(summary) > 0, "Should return metadata fallback"
print(" ✓ generate_summary handles empty chunks")
entities = analyzer.extract_entities(empty_chunks)
assert entities == [], "Should return empty list"
print(" ✓ extract_entities handles empty chunks")
sentiment = analyzer.analyze_sentiment(empty_chunks)
assert sentiment['sentiment'] == 'neutral', "Should return neutral for empty"
print(" ✓ analyze_sentiment handles empty chunks")
difficulty = analyzer.calculate_difficulty(empty_chunks)
assert difficulty['level'] == 'beginner', "Should return beginner for empty"
print(" ✓ calculate_difficulty handles empty chunks")
actions = analyzer.extract_action_items(empty_chunks)
assert actions == [], "Should return empty list"
print(" ✓ extract_action_items handles empty chunks")
print("✅ PASSED")
def main():
"""Run all unit tests."""
print("="*60)
print("ENHANCED ANALYZER UNIT TESTS")
print("="*60)
try:
# Test each method
test_generate_summary()
test_extract_entities()
test_analyze_sentiment()
test_calculate_difficulty()
test_extract_action_items()
# Test edge cases
test_empty_chunks()
# Summary
print("\n" + "="*60)
print("ALL TESTS PASSED ✅")
print("="*60)
print("\nEnhanced analyzer methods are working correctly!")
print("Ready for integration with lab dashboard.")
except AssertionError as e:
print(f"\n❌ TEST FAILED: {e}")
raise
except Exception as e:
print(f"\n❌ ERROR: {e}")
raise
if __name__ == "__main__":
main()