#!/usr/bin/env python3
"""
Test script for Lab Dashboard Service
Demonstrates the clinical lab-style dashboard generation with sample data.
"""
from pathlib import Path
from datetime import datetime
from ytpipe.core.models import VideoMetadata, Chunk, AnalysisReport
from ytpipe.services.exporters import LabDashboardService
def create_sample_data():
"""Create sample video metadata and chunks for testing."""
# Sample metadata
metadata = VideoMetadata(
video_id="dQw4w9WgXcQ",
url="https://www.youtube.com/watch?v=dQw4w9WgXcQ",
title="Advanced Machine Learning Techniques: Neural Networks and Deep Learning",
duration=1847, # ~30 minutes
upload_date="20240115",
view_count=125000,
like_count=8500,
comment_count=342,
channel="AI Research Institute",
description="Comprehensive overview of modern ML techniques...",
processed_at=datetime.now(),
chunks_count=42,
total_words=8234,
transcription_method="whisper-large-v3"
)
# Sample chunks with varying quality and timestamps
chunks = [
Chunk(
id=0,
text="Welcome to this comprehensive tutorial on neural networks. Today we'll explore the fundamental concepts that power modern AI systems, including backpropagation, gradient descent, and activation functions.",
word_count=28,
start_char=0,
end_char=180,
quality_score=8.5,
timestamp_start="0:00",
timestamp_end="0:15"
),
Chunk(
id=1,
text="Neural networks are inspired by biological neurons in the human brain. Each artificial neuron receives inputs, applies weights, and produces an output through an activation function.",
word_count=26,
start_char=181,
end_char=360,
quality_score=9.2,
timestamp_start="0:15",
timestamp_end="0:30"
),
Chunk(
id=2,
text="The architecture of a neural network consists of layers: input layer, hidden layers, and output layer. The hidden layers extract increasingly abstract features from the data.",
word_count=28,
start_char=361,
end_char=540,
quality_score=8.8,
timestamp_start="0:30",
timestamp_end="0:45"
),
Chunk(
id=3,
text="Training involves forward propagation to make predictions and backward propagation to update weights. This iterative process minimizes the loss function using optimization algorithms.",
word_count=25,
start_char=541,
end_char=720,
quality_score=9.0,
timestamp_start="0:45",
timestamp_end="1:00"
),
Chunk(
id=4,
text="Common activation functions include ReLU, sigmoid, and tanh. Each has different properties that make them suitable for specific use cases in neural network design.",
word_count=26,
start_char=721,
end_char=900,
quality_score=7.8,
timestamp_start="1:00",
timestamp_end="1:15"
),
Chunk(
id=5,
text="Deep learning extends neural networks with many hidden layers, enabling the model to learn hierarchical representations. This is crucial for complex tasks like image recognition.",
word_count=27,
start_char=901,
end_char=1080,
quality_score=8.6,
timestamp_start="1:15",
timestamp_end="1:30"
),
Chunk(
id=6,
text="Convolutional neural networks (CNNs) are specialized for processing grid-like data such as images. They use convolutional layers to detect spatial patterns and features.",
word_count=25,
start_char=1081,
end_char=1260,
quality_score=8.4,
timestamp_start="1:30",
timestamp_end="1:45"
),
Chunk(
id=7,
text="Recurrent neural networks (RNNs) handle sequential data by maintaining hidden states. This makes them ideal for time series analysis, natural language processing, and speech recognition.",
word_count=27,
start_char=1261,
end_char=1440,
quality_score=8.9,
timestamp_start="1:45",
timestamp_end="2:00"
),
Chunk(
id=8,
text="Long short-term memory (LSTM) networks solve the vanishing gradient problem in RNNs by using gating mechanisms. This allows them to capture long-term dependencies effectively.",
word_count=26,
start_char=1441,
end_char=1620,
quality_score=9.1,
timestamp_start="2:00",
timestamp_end="2:15"
),
Chunk(
id=9,
text="Transformers revolutionized NLP with self-attention mechanisms. They can process entire sequences in parallel, making training much faster than RNNs while achieving better performance.",
word_count=25,
start_char=1621,
end_char=1800,
quality_score=9.3,
timestamp_start="2:15",
timestamp_end="2:30"
),
# Add more chunks for realistic display
Chunk(
id=10,
text="Regularization techniques like dropout and batch normalization prevent overfitting. They help models generalize better to unseen data by reducing reliance on specific features.",
word_count=25,
start_char=1801,
end_char=1980,
quality_score=8.2,
timestamp_start="2:30",
timestamp_end="2:45"
),
Chunk(
id=11,
text="Learning rate scheduling adjusts the step size during training. Starting with larger rates and gradually decreasing helps converge to better optima more efficiently.",
word_count=24,
start_char=1981,
end_char=2160,
quality_score=7.9,
timestamp_start="2:45",
timestamp_end="3:00"
),
Chunk(
id=12,
text="Data augmentation artificially expands training datasets by applying transformations. This is particularly effective in computer vision tasks where labeled data is scarce.",
word_count=24,
start_char=2161,
end_char=2340,
quality_score=8.3,
timestamp_start="3:00",
timestamp_end="3:15"
),
Chunk(
id=13,
text="Transfer learning leverages pre-trained models on new tasks. Fine-tuning saves computational resources and often achieves better results than training from scratch.",
word_count=23,
start_char=2341,
end_char=2520,
quality_score=8.7,
timestamp_start="3:15",
timestamp_end="3:30"
),
Chunk(
id=14,
text="Evaluation metrics vary by task: accuracy for classification, mean squared error for regression, and F1 score for imbalanced datasets. Choosing appropriate metrics is critical.",
word_count=26,
start_char=2521,
end_char=2700,
quality_score=8.1,
timestamp_start="3:30",
timestamp_end="3:45"
),
]
# Sample analysis report
analysis = AnalysisReport(
video_id=metadata.video_id,
metadata=metadata,
total_words=395, # Sum of chunk word counts
total_chunks=len(chunks),
avg_chunk_quality=8.5,
top_keywords=[
{'keyword': 'neural', 'frequency': 12},
{'keyword': 'networks', 'frequency': 10},
{'keyword': 'learning', 'frequency': 9},
{'keyword': 'data', 'frequency': 7},
{'keyword': 'training', 'frequency': 6},
{'keyword': 'models', 'frequency': 5},
{'keyword': 'layers', 'frequency': 5},
{'keyword': 'activation', 'frequency': 4},
{'keyword': 'gradient', 'frequency': 3},
{'keyword': 'optimization', 'frequency': 3},
],
topics=[
"neural networks fundamentals",
"deep learning architectures",
"training optimization"
],
high_quality_chunks=12,
medium_quality_chunks=3,
low_quality_chunks=0,
processing_time=45.3,
timestamp=datetime.now()
)
return metadata, chunks, analysis
def main():
"""Generate lab dashboard with sample data."""
print("Lab Dashboard Service Test")
print("=" * 50)
# Create sample data
print("\n1. Creating sample data...")
metadata, chunks, analysis = create_sample_data()
print(f" ✓ Video: {metadata.title}")
print(f" ✓ Chunks: {len(chunks)}")
print(f" ✓ Analysis: {analysis.total_words} words")
# Initialize service
print("\n2. Initializing LabDashboardService...")
service = LabDashboardService()
print(" ✓ Service ready")
# Generate dashboard
print("\n3. Generating lab dashboard...")
output_dir = Path("./test_output")
output_dir.mkdir(exist_ok=True)
dashboard_path = service.generate_lab_dashboard(
metadata=metadata,
chunks=chunks,
analysis=analysis,
output_dir=output_dir
)
print(f" ✓ Dashboard generated: {dashboard_path}")
print(f" ✓ File size: {dashboard_path.stat().st_size / 1024:.1f} KB")
# Success message
print("\n" + "=" * 50)
print("SUCCESS: Lab dashboard generated!")
print(f"\nOpen in browser:")
print(f" file://{dashboard_path.absolute()}")
print("\nFeatures:")
print(" • Clinical lab aesthetic (white background, grayscale + blue)")
print(" • Swiss typography with high contrast")
print(" • Video player placeholder with metadata strip")
print(" • Analysis sidebar (summary, concepts, entities, etc.)")
print(" • Transcript chunks with timestamps")
print(" • Timeline visualization with chunk markers")
print("=" * 50)
if __name__ == "__main__":
main()