#!/usr/bin/env python3
"""Check benchmark data coverage by period."""
import sys
from pathlib import Path
import pandas as pd
# Add src to path
sys.path.insert(0, str(Path(__file__).parent / "src"))
from tools.advisor_report import AdvisorReportGenerator
def main():
reports_path = Path(__file__).parent.parent / "data" / "reports"
generator = AdvisorReportGenerator(str(reports_path))
# Load data
data = generator.load_data()
portfolio_df = data['portfolio_df']
# Get periods
periods = generator._auto_detect_periods(portfolio_df)
print("\n" + "="*80)
print("BENCHMARK DATA COVERAGE BY PERIOD")
print("="*80)
for period_name, (start_month, end_month) in periods.items():
period_data = portfolio_df[
(portfolio_df['month'] >= start_month) &
(portfolio_df['month'] <= end_month)
].copy()
total_months = len(period_data)
portfolio_valid = period_data['portfolio_return'].notna().sum()
sp500_valid = period_data['sp500_return_cad'].notna().sum()
print(f"\n{period_name}")
print(f" Date Range: {start_month} to {end_month}")
print(f" Total Months: {total_months}")
print(f" Portfolio Returns Available: {portfolio_valid}/{total_months} ({portfolio_valid/total_months*100:.1f}%)")
print(f" S&P 500 Returns Available: {sp500_valid}/{total_months} ({sp500_valid/total_months*100:.1f}%)")
if sp500_valid < portfolio_valid:
missing_count = portfolio_valid - sp500_valid
print(f" ⚠️ MISSING S&P 500 DATA: {missing_count} months")
# Show which months are missing
missing_months = period_data[
period_data['portfolio_return'].notna() &
period_data['sp500_return_cad'].isna()
]['month'].tolist()
if missing_months:
print(f" Missing months: {', '.join(missing_months[:5])}" +
(f"... and {len(missing_months)-5} more" if len(missing_months) > 5 else ""))
print("\n" + "="*80)
if __name__ == "__main__":
main()