"""Financial Advisor Report generation tools."""
import math
import base64
import io
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg') # Non-interactive backend
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from jinja2 import Environment, FileSystemLoader, select_autoescape
class AdvisorReportGenerator:
"""Generate comprehensive financial advisor reports with historical analysis and projections."""
def __init__(self, reports_path: str):
"""Initialize advisor report generator.
Args:
reports_path: Path to reports directory containing monthly_returns.csv
and monte_carlo/ subdirectory
"""
self.reports_path = Path(reports_path)
self.monthly_returns_path = self.reports_path / "monthly_returns.csv"
self.monte_carlo_path = self.reports_path / "monte_carlo" / "latest"
self.account_name_map = {
"U4355172": "Interactive Brokers (Corporate)",
"U4213209": "Interactive Brokers (Personal)",
"591-94177-16": "Scotia RRSP",
"51516162": "Questrade TFSA",
"686X42-V": "TD RESP"
}
def load_data(self) -> Dict:
"""Load all required data sources.
Returns:
Dictionary containing:
- monthly_df: Raw monthly returns data for all accounts
- portfolio_df: Aggregated portfolio-level returns
- mc_projections: Monte Carlo projection data
- mc_summary: Monte Carlo simulation metadata
- current_portfolio_value: Most recent portfolio value
- data_coverage: Statistics about data availability
"""
# Load monthly returns CSV
if not self.monthly_returns_path.exists():
raise FileNotFoundError(
f"Monthly returns file not found at {self.monthly_returns_path}. "
"Run update_monthly_returns tool first."
)
monthly_df = pd.read_csv(self.monthly_returns_path)
# Apply account name mapping
monthly_df['Account_Number'] = monthly_df['Account_Number'].map(self.account_name_map).fillna(monthly_df['Account_Number'])
# Load Monte Carlo outputs
mc_projections_path = self.monte_carlo_path / "projections.csv"
mc_summary_path = self.monte_carlo_path / "summary.csv"
if not mc_projections_path.exists():
raise FileNotFoundError(
f"Monte Carlo projections not found at {mc_projections_path}. "
"Run run_monte_carlo_simulation tool first."
)
mc_projections = pd.read_csv(mc_projections_path)
mc_summary = pd.read_csv(mc_summary_path)
# Aggregate portfolio returns
portfolio_df = self.aggregate_portfolio_returns(monthly_df)
# Get current portfolio value (most recent ending balance)
current_value = portfolio_df['ending_balance'].iloc[-1]
# Calculate data coverage statistics - use unique months
unique_months = len(portfolio_df)
benchmark_coverage = {
'sp500_cad': (portfolio_df['sp500_return_cad'].notna()).sum(),
'risk_free_rate': (portfolio_df['risk_free_rate'].notna()).sum(),
}
# Parse Monte Carlo summary (Metric,Value format)
mc_summary_dict = {}
for _, row in mc_summary.iterrows():
metric = row['Metric']
value = row['Value']
if pd.notna(metric) and pd.notna(value):
mc_summary_dict[metric] = value
return {
'monthly_df': monthly_df,
'portfolio_df': portfolio_df,
'mc_projections': mc_projections,
'mc_summary': mc_summary_dict,
'current_portfolio_value': current_value,
'data_coverage': {
'total_account_months': len(monthly_df),
'unique_months': unique_months,
'date_range': f"{portfolio_df['month'].iloc[0]} to {portfolio_df['month'].iloc[-1]}",
'benchmark_coverage_pct': {
key: round(count / unique_months * 100, 1)
for key, count in benchmark_coverage.items()
}
}
}
def aggregate_portfolio_returns(
self, monthly_df: pd.DataFrame, account_numbers: Optional[List[str]] = None
) -> pd.DataFrame:
"""Aggregate account-level returns to portfolio level using Modified Dietz.
Weights returns by beginning balance to calculate combined portfolio return.
Preserves benchmark data from the CSV (S&P 500, risk-free rate).
Args:
monthly_df: Raw monthly returns DataFrame from CSV
account_numbers: Optional list of accounts to include (default: all)
Returns:
DataFrame with columns:
- month: YYYY-MM format
- beginning_balance: Total portfolio beginning balance (CAD)
- ending_balance: Total portfolio ending balance (CAD)
- net_cash_flow: Total deposits - withdrawals (CAD)
- investment_gain: Market gains excluding cash flows (CAD)
- portfolio_return: Time-weighted return (decimal)
- sp500_return_usd: S&P 500 monthly return (decimal)
- sp500_return_cad: S&P 500 CAD-adjusted return (decimal)
- excess_return: Portfolio alpha vs S&P 500 (decimal)
- risk_free_rate: Risk-free monthly rate (decimal)
- excess_over_rf: Excess return over risk-free (decimal)
- accounts_included: List of account numbers in this month
"""
df = monthly_df.copy()
# Filter by account numbers if specified
if account_numbers:
df = df[df['Account_Number'].isin(account_numbers)]
# Get unique months and sort chronologically
unique_months = sorted(df['Month'].unique())
# Aggregate by month
portfolio_data = []
for month in unique_months:
month_data = df[df['Month'] == month]
# Calculate portfolio-level return using Modified Dietz method
total_beginning = month_data['Beginning_Balance_CAD'].sum()
total_ending = month_data['Ending_Balance_CAD'].sum()
total_deposits = month_data['Deposits_CAD'].sum()
total_withdrawals = month_data['Withdrawals_CAD'].sum()
total_net_flow = month_data['Net_Cash_Flow_CAD'].sum()
# Investment gain (market movement excluding cash flows)
investment_gain = total_ending - total_beginning - total_net_flow
# Modified Dietz: Gain / (Beginning + 0.5 * Net_Flow)
denominator = total_beginning + 0.5 * total_net_flow
if denominator != 0:
portfolio_return = investment_gain / denominator
else:
portfolio_return = 0.0 if investment_gain == 0 else float('inf')
# Get benchmark data (same for all accounts in a month, so take first)
first_row = month_data.iloc[0]
# Helper to convert 'N/A' to None
def safe_float(val):
return None if val == 'N/A' else float(val)
# Extract benchmark returns
sp500_return_usd = safe_float(first_row['SP500_Return_USD'])
sp500_return_cad = safe_float(first_row['SP500_Return_CAD'])
risk_free_rate = safe_float(first_row['Risk_Free_Rate'])
# Recalculate excess returns based on aggregated portfolio return
# (Don't use the per-account values from the CSV)
if portfolio_return != float('inf') and portfolio_return is not None:
excess_return = portfolio_return - sp500_return_cad if sp500_return_cad is not None else None
excess_over_rf = portfolio_return - risk_free_rate if risk_free_rate is not None else None
else:
excess_return = None
excess_over_rf = None
portfolio_data.append({
'month': month,
'beginning_balance': total_beginning,
'ending_balance': total_ending,
'deposits': total_deposits,
'withdrawals': total_withdrawals,
'net_cash_flow': total_net_flow,
'investment_gain': investment_gain,
'portfolio_return': portfolio_return if portfolio_return != float('inf') else None,
'sp500_return_usd': sp500_return_usd,
'sp500_return_cad': sp500_return_cad,
'excess_return': excess_return,
'risk_free_rate': risk_free_rate,
'excess_over_rf': excess_over_rf,
'accounts_included': month_data['Account_Number'].unique().tolist()
})
return pd.DataFrame(portfolio_data)
def calculate_period_metrics(
self, portfolio_df: pd.DataFrame, custom_periods: Optional[Dict[str, Tuple[str, str]]] = None
) -> Dict[str, Dict]:
"""Calculate performance metrics for different time periods.
Automatically determines period boundaries based on available data.
Default periods: Early (first 24 months), Middle (next 24 months),
Recent (last 24 months), Full (all data), YTD (current year).
Args:
portfolio_df: Aggregated portfolio returns DataFrame
custom_periods: Optional dict of {period_name: (start_month, end_month)}
Format: {'Q1 2024': ('2024-01', '2024-03')}
Returns:
Dictionary with period names as keys and metrics as values:
{
'period_name': {
'start_date': '2020-01',
'end_date': '2021-12',
'n_months': 24,
'annualized_return': 0.15, # 15%
'annualized_volatility': 0.20, # 20%
'sharpe_ratio': 0.75,
'alpha_vs_sp500': 0.05, # 5% annualized
'beta_vs_sp500': 1.2,
'correlation_vs_sp500': 0.85,
'win_rate': 0.60, # 60% of months beat S&P 500
'cumulative_return': 0.35, # 35% total return
'max_drawdown': -0.15, # -15% peak-to-trough
'avg_excess_over_rf': 0.008 # 0.8% monthly
}
}
"""
# Determine period boundaries
if custom_periods:
periods = custom_periods
else:
# Auto-detect periods based on available data
periods = self._auto_detect_periods(portfolio_df)
results = {}
for period_name, (start_month, end_month) in periods.items():
period_data = portfolio_df[
(portfolio_df['month'] >= start_month) &
(portfolio_df['month'] <= end_month)
].copy()
if len(period_data) == 0:
continue
# Calculate metrics
metrics = self._calculate_metrics_for_period(period_data)
metrics['start_date'] = start_month
metrics['end_date'] = end_month
metrics['n_months'] = len(period_data)
results[period_name] = metrics
return results
def _auto_detect_periods(self, portfolio_df: pd.DataFrame) -> Dict[str, Tuple[str, str]]:
"""Auto-detect period boundaries based on available data.
Returns standard periods: Early, Middle, Recent, Full, YTD.
Adapts to available data automatically.
"""
months = portfolio_df['month'].tolist()
first_month = months[0]
last_month = months[-1]
total_months = len(months)
periods = {}
# Full period
periods['Full Period'] = (first_month, last_month)
# YTD (current year)
current_year = last_month[:4] # Extract YYYY from YYYY-MM
ytd_start = f"{current_year}-01"
if ytd_start in months:
periods['YTD'] = (ytd_start, last_month)
# Divide remaining data into Early, Middle, Recent (if enough data)
if total_months >= 36:
# Split into thirds
third = total_months // 3
early_start = months[0]
early_end = months[third - 1]
periods['Early Period (2020-2021)'] = (early_start, early_end)
middle_start = months[third]
middle_end = months[2 * third - 1]
periods['Middle Period (2022-2023)'] = (middle_start, middle_end)
recent_start = months[2 * third]
recent_end = months[-1]
periods['Recent Period (2024-2025)'] = (recent_start, recent_end)
# Rolling 12-month period (most recent year)
if total_months >= 12:
rolling_start = months[-12]
periods['Recent 12 Months'] = (rolling_start, last_month)
return periods
def _calculate_metrics_for_period(self, period_data: pd.DataFrame) -> Dict:
"""Calculate comprehensive metrics for a single period.
Args:
period_data: Subset of portfolio_df for this period
Returns:
Dictionary of calculated metrics
"""
# Extract returns - use only rows where BOTH portfolio and S&P 500 data exist
# This handles periods with sparse benchmark data more gracefully
valid_mask = period_data['portfolio_return'].notna() & period_data['sp500_return_cad'].notna()
valid_data = period_data[valid_mask].copy()
portfolio_returns = period_data['portfolio_return'].dropna().values
excess_over_rf = period_data['excess_over_rf'].dropna().values
# For benchmark comparisons, use only overlapping valid data
portfolio_returns_paired = valid_data['portfolio_return'].values
sp500_returns_paired = valid_data['sp500_return_cad'].values
excess_returns = valid_data['excess_return'].values
if len(portfolio_returns) == 0:
return {'error': 'No valid return data for this period'}
# Basic statistics (use all portfolio data)
mean_monthly = np.mean(portfolio_returns)
monthly_vol = np.std(portfolio_returns, ddof=1)
# Annualize
annualized_return = mean_monthly * 12
annualized_vol = monthly_vol * math.sqrt(12)
# Sharpe ratio (using excess over risk-free)
if len(excess_over_rf) > 0 and monthly_vol > 0:
sharpe_ratio = np.mean(excess_over_rf) / monthly_vol * math.sqrt(12)
else:
sharpe_ratio = None
# Cumulative return (geometric)
cumulative_return = np.prod(1 + portfolio_returns) - 1
# Alpha vs S&P 500 (using paired/overlapping data only)
if len(portfolio_returns_paired) > 0 and len(sp500_returns_paired) > 0:
alpha_monthly = np.mean(portfolio_returns_paired) - np.mean(sp500_returns_paired)
alpha_annualized = alpha_monthly * 12
else:
alpha_annualized = None
# Beta and correlation (using paired/overlapping data only)
if len(portfolio_returns_paired) > 1 and len(sp500_returns_paired) > 1:
covariance = np.cov(portfolio_returns_paired, sp500_returns_paired)[0, 1]
sp500_variance = np.var(sp500_returns_paired, ddof=1)
beta = covariance / sp500_variance if sp500_variance > 0 else None
correlation = np.corrcoef(portfolio_returns_paired, sp500_returns_paired)[0, 1]
else:
beta = None
correlation = None
# Win rate (% of months beating S&P 500)
if len(excess_returns) > 0:
win_rate = (excess_returns > 0).sum() / len(excess_returns)
else:
win_rate = None
# Maximum drawdown
max_drawdown = self._calculate_max_drawdown(portfolio_returns)
# Average excess over risk-free
avg_excess_rf = np.mean(excess_over_rf) if len(excess_over_rf) > 0 else None
return {
'annualized_return': annualized_return,
'annualized_volatility': annualized_vol,
'sharpe_ratio': sharpe_ratio,
'alpha_vs_sp500': alpha_annualized,
'beta_vs_sp500': beta,
'correlation_vs_sp500': correlation,
'win_rate': win_rate,
'cumulative_return': cumulative_return,
'max_drawdown': max_drawdown,
'avg_excess_over_rf': avg_excess_rf,
'mean_monthly_return': mean_monthly,
'monthly_volatility': monthly_vol
}
def _calculate_max_drawdown(self, returns: np.ndarray) -> float:
"""Calculate maximum drawdown from peak to trough.
Args:
returns: Array of monthly returns (decimal format)
Returns:
Maximum drawdown as negative decimal (e.g., -0.15 for 15% drawdown)
"""
# Calculate cumulative returns
cumulative = np.cumprod(1 + returns)
# Calculate running maximum
running_max = np.maximum.accumulate(cumulative)
# Calculate drawdowns
drawdowns = (cumulative - running_max) / running_max
# Return maximum drawdown (most negative value)
return np.min(drawdowns)
def create_visualizations(self, portfolio_df: pd.DataFrame, monthly_df: pd.DataFrame) -> Dict:
"""Create all visualizations for the report.
Args:
portfolio_df: Aggregated portfolio-level returns
monthly_df: Raw account-level monthly data
Returns:
Dictionary with base64-encoded matplotlib charts and plotly JSON
"""
visualizations = {}
# Static matplotlib charts (base64 encoded)
visualizations['cumulative_returns'] = self._create_cumulative_returns_chart(portfolio_df)
visualizations['account_allocation'] = self._create_account_allocation_chart(monthly_df)
visualizations['ytd_returns_by_account'] = self._create_ytd_returns_chart(monthly_df)
visualizations['monthly_excess_returns'] = self._create_excess_returns_chart(portfolio_df)
visualizations['max_drawdown'] = self._create_drawdown_chart(portfolio_df)
# Interactive plotly charts (JSON)
visualizations['rolling_sharpe'] = self._create_rolling_sharpe_chart(portfolio_df)
visualizations['portfolio_vs_sp500_scatter'] = self._create_scatter_chart(portfolio_df)
visualizations['volatility_evolution'] = self._create_volatility_chart(portfolio_df)
# Monte Carlo visualization paths
mc_path = self.monte_carlo_path
visualizations['monte_carlo'] = {
'fan_chart': str(mc_path / 'fan_chart.png') if (mc_path / 'fan_chart.png').exists() else None,
'distribution': str(mc_path / 'distribution_histogram.png') if (mc_path / 'distribution_histogram.png').exists() else None,
'confidence_intervals': str(mc_path / 'confidence_intervals.png') if (mc_path / 'confidence_intervals.png').exists() else None
}
return visualizations
def _create_cumulative_returns_chart(self, portfolio_df: pd.DataFrame) -> str:
"""Create cumulative returns chart (Portfolio vs S&P 500)."""
fig, ax = plt.subplots(figsize=(10, 6))
# Calculate cumulative returns (base 100)
portfolio_returns = portfolio_df['portfolio_return'].dropna().values
sp500_returns = portfolio_df['sp500_return_cad'].dropna().values
portfolio_cumulative = np.cumprod(1 + portfolio_returns) * 100
sp500_cumulative = np.cumprod(1 + sp500_returns) * 100
months = portfolio_df['month'].values[:len(portfolio_cumulative)]
# Plot
ax.plot(months, portfolio_cumulative, label='Portfolio', linewidth=2, color='#1e3a5f')
ax.plot(months[:len(sp500_cumulative)], sp500_cumulative, label='S&P 500 (CAD)', linewidth=2, color='#d4af37', alpha=0.7)
ax.set_xlabel('Month', fontsize=12)
ax.set_ylabel('Cumulative Return (Base 100)', fontsize=12)
ax.set_title('Cumulative Returns: Portfolio vs S&P 500', fontsize=14, fontweight='bold')
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)
# Show every 6th month
tick_positions = range(0, len(months), 6)
ax.set_xticks([months[i] for i in tick_positions])
plt.xticks(rotation=45, ha='right')
# Manually adjust bottom margin and apply tight layout
fig.subplots_adjust(bottom=0.25)
plt.tight_layout()
return self._fig_to_base64(fig)
def _create_account_allocation_chart(self, monthly_df: pd.DataFrame) -> str:
"""Create pie chart of account allocation (most recent month)."""
# Get most recent month
latest_month = monthly_df['Month'].max()
latest_data = monthly_df[monthly_df['Month'] == latest_month]
# Group by account
account_balances = latest_data.groupby('Account_Number')['Ending_Balance_CAD'].sum()
fig, ax = plt.subplots(figsize=(8, 8))
colors = ['#1e3a5f', '#d4af37', '#2e5f8f', '#c49f47', '#4e7faf']
wedges, texts, autotexts = ax.pie(
account_balances.values,
labels=account_balances.index,
autopct='%1.1f%%',
startangle=90,
colors=colors
)
for autotext in autotexts:
autotext.set_color('white')
autotext.set_fontsize(10)
autotext.set_fontweight('bold')
ax.set_title(f'Portfolio Allocation by Account ({latest_month})', fontsize=14, fontweight='bold')
plt.tight_layout()
return self._fig_to_base64(fig)
def _create_ytd_returns_chart(self, monthly_df: pd.DataFrame) -> str:
"""Create bar chart of YTD returns by account."""
# Get current year
current_year = monthly_df['Month'].max()[:4]
ytd_data = monthly_df[monthly_df['Month'].str.startswith(current_year)]
# Calculate YTD return per account (geometric)
ytd_returns = {}
for account in ytd_data['Account_Number'].unique():
account_data = ytd_data[ytd_data['Account_Number'] == account]
returns = account_data['Return'].apply(lambda x: float(x) if x != 'N/A' else 0).values
ytd_return = np.prod(1 + returns) - 1
ytd_returns[account] = ytd_return * 100
fig, ax = plt.subplots(figsize=(10, 6))
accounts = list(ytd_returns.keys())
returns = list(ytd_returns.values())
colors = ['#2e8b57' if r >= 0 else '#dc143c' for r in returns]
ax.bar(accounts, returns, color=colors)
ax.set_xlabel('Account', fontsize=12)
ax.set_ylabel('YTD Return (%)', fontsize=12)
ax.set_title(f'{current_year} YTD Returns by Account', fontsize=14, fontweight='bold')
ax.axhline(y=0, color='black', linestyle='-', linewidth=0.8)
ax.grid(True, alpha=0.3, axis='y')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
return self._fig_to_base64(fig)
def _create_excess_returns_chart(self, portfolio_df: pd.DataFrame) -> str:
"""Create bar chart of monthly excess returns (green/red)."""
fig, ax = plt.subplots(figsize=(12, 6))
months = portfolio_df['month'].values
excess = portfolio_df['excess_return'].values
# Filter out None values
valid_mask = ~pd.isna(excess)
months_filtered = months[valid_mask]
excess_filtered = excess[valid_mask] * 100 # Convert to percentage
colors = ['#2e8b57' if e >= 0 else '#dc143c' for e in excess_filtered]
ax.bar(months_filtered, excess_filtered, color=colors, alpha=0.7)
ax.set_xlabel('Month', fontsize=12)
ax.set_ylabel('Excess Return vs S&P 500 (%)', fontsize=12)
ax.set_title('Monthly Excess Returns (Portfolio - S&P 500)', fontsize=14, fontweight='bold')
ax.axhline(y=0, color='black', linestyle='-', linewidth=0.8)
ax.grid(True, alpha=0.3, axis='y')
# Show every 6th month
tick_positions = range(0, len(months_filtered), 6)
ax.set_xticks([months_filtered[i] for i in tick_positions])
plt.xticks(rotation=45, ha='right')
# Manually adjust bottom margin
fig.subplots_adjust(bottom=0.25)
plt.tight_layout()
return self._fig_to_base64(fig)
def _create_drawdown_chart(self, portfolio_df: pd.DataFrame) -> str:
"""Create area chart showing drawdown over time."""
fig, ax = plt.subplots(figsize=(12, 6))
returns = portfolio_df['portfolio_return'].dropna().values
months = portfolio_df['month'].values[:len(returns)]
# Calculate cumulative returns and drawdowns
cumulative = np.cumprod(1 + returns)
running_max = np.maximum.accumulate(cumulative)
drawdowns = (cumulative - running_max) / running_max * 100 # Convert to percentage
ax.fill_between(months, drawdowns, 0, where=(drawdowns < 0),
color='#dc143c', alpha=0.3, label='Drawdown')
ax.plot(months, drawdowns, color='#dc143c', linewidth=1.5)
ax.set_xlabel('Month', fontsize=12)
ax.set_ylabel('Drawdown (%)', fontsize=12)
ax.set_title('Portfolio Drawdown Over Time', fontsize=14, fontweight='bold')
ax.axhline(y=0, color='black', linestyle='-', linewidth=0.8)
ax.grid(True, alpha=0.3)
# Show every 6th month
tick_positions = range(0, len(months), 6)
ax.set_xticks([months[i] for i in tick_positions])
plt.xticks(rotation=45, ha='right')
# Manually adjust bottom margin and apply tight layout
fig.subplots_adjust(bottom=0.25)
plt.tight_layout()
return self._fig_to_base64(fig)
def _create_rolling_sharpe_chart(self, portfolio_df: pd.DataFrame) -> str:
"""Create interactive plotly chart of rolling 12-month Sharpe ratio."""
# Calculate rolling 12-month Sharpe
rolling_sharpe = []
months = []
for i in range(12, len(portfolio_df) + 1):
window_data = portfolio_df.iloc[i-12:i]
excess_rf = window_data['excess_over_rf'].dropna().values
returns = window_data['portfolio_return'].dropna().values
if len(excess_rf) > 0 and len(returns) > 0:
sharpe = np.mean(excess_rf) / np.std(returns, ddof=1) * math.sqrt(12)
rolling_sharpe.append(sharpe)
months.append(window_data['month'].iloc[-1])
print(f"DEBUG: Rolling Sharpe data points: {len(rolling_sharpe)}")
print(f"DEBUG: Rolling Sharpe months: {len(months)}")
# Create plotly figure
fig = go.Figure()
fig.add_trace(go.Scatter(
x=months,
y=rolling_sharpe,
mode='lines',
name='Rolling 12-Month Sharpe',
line=dict(color='#1e3a5f', width=2),
hovertemplate='<b>%{x}</b><br>Sharpe: %{y:.2f}<extra></extra>'
))
# Add reference line at 1.0 (good Sharpe)
fig.add_hline(y=1.0, line_dash="dash", line_color="gray",
annotation_text="Sharpe = 1.0 (Good)")
fig.update_layout(
title='Rolling 12-Month Sharpe Ratio',
xaxis_title='Month',
yaxis_title='Sharpe Ratio',
hovermode='x unified',
template='plotly_white',
height=400
)
return fig.to_json()
def _create_scatter_chart(self, portfolio_df: pd.DataFrame) -> str:
"""Create interactive scatter plot: Portfolio vs S&P 500 returns with regression."""
# Filter data with both portfolio and S&P 500 returns
valid_data = portfolio_df[
portfolio_df['portfolio_return'].notna() &
portfolio_df['sp500_return_cad'].notna()
].copy()
portfolio_returns = valid_data['portfolio_return'].values * 100 # Convert to %
sp500_returns = valid_data['sp500_return_cad'].values * 100
# Calculate regression line
coeffs = np.polyfit(sp500_returns, portfolio_returns, 1)
regression_line = np.poly1d(coeffs)
x_line = np.linspace(sp500_returns.min(), sp500_returns.max(), 100)
y_line = regression_line(x_line)
# Create figure
fig = go.Figure()
# Scatter points (color by time index)
month_indices = list(range(len(valid_data)))
# Get first and last month for color bar labels
first_month = valid_data['month'].iloc[0]
last_month = valid_data['month'].iloc[-1]
fig.add_trace(go.Scatter(
x=sp500_returns.tolist(), # Convert to list to avoid binary encoding
y=portfolio_returns.tolist(), # Convert to list to avoid binary encoding
mode='markers',
name='Monthly Returns',
marker=dict(
size=10,
color=month_indices,
colorscale='Viridis',
showscale=True,
colorbar=dict(
title="Time Period",
tickvals=[0, len(month_indices)//2, len(month_indices)-1],
ticktext=[first_month, '', last_month],
len=0.7
),
opacity=0.8,
line=dict(width=1, color='DarkSlateGray')
),
text=valid_data['month'].tolist(), # Convert to list
hovertemplate='<b>%{text}</b><br>S&P 500: %{x:.2f}%<br>Portfolio: %{y:.2f}%<extra></extra>'
))
# Regression line
fig.add_trace(go.Scatter(
x=x_line.tolist(), # Convert to list
y=y_line.tolist(), # Convert to list
mode='lines',
name=f'Regression (β={coeffs[0]:.2f})',
line=dict(color='#dc143c', width=2, dash='dash')
))
# Add diagonal line (y=x)
min_val = min(sp500_returns.min(), portfolio_returns.min())
max_val = max(sp500_returns.max(), portfolio_returns.max())
fig.add_trace(go.Scatter(
x=[min_val, max_val],
y=[min_val, max_val],
mode='lines',
name='Equal Returns (y=x)',
line=dict(color='gray', width=1, dash='dot')
))
fig.update_layout(
title={
'text': 'Portfolio vs S&P 500 Monthly Returns',
'x': 0.5,
'xanchor': 'center'
},
xaxis_title='S&P 500 Return (%)',
yaxis_title='Portfolio Return (%)',
hovermode='closest',
template='plotly_white',
height=600,
showlegend=True,
legend=dict(
orientation="v",
yanchor="top",
y=0.99,
xanchor="left",
x=0.01,
bgcolor="rgba(255, 255, 255, 0.9)",
bordercolor="rgba(0, 0, 0, 0.2)",
borderwidth=1
),
xaxis=dict(
zeroline=True,
zerolinewidth=2,
zerolinecolor='rgba(0,0,0,0.2)',
gridcolor='rgba(0,0,0,0.05)'
),
yaxis=dict(
zeroline=True,
zerolinewidth=2,
zerolinecolor='rgba(0,0,0,0.2)',
gridcolor='rgba(0,0,0,0.05)'
)
)
return fig.to_json()
def _create_volatility_chart(self, portfolio_df: pd.DataFrame) -> str:
"""Create interactive chart showing rolling 6-month volatility."""
# Calculate rolling 6-month volatility
rolling_vol = []
months = []
for i in range(6, len(portfolio_df) + 1):
window_data = portfolio_df.iloc[i-6:i]
returns = window_data['portfolio_return'].dropna().values
if len(returns) > 0:
monthly_vol = np.std(returns, ddof=1)
annualized_vol = monthly_vol * math.sqrt(12) * 100 # Convert to %
rolling_vol.append(annualized_vol)
months.append(window_data['month'].iloc[-1])
# Create figure
fig = go.Figure()
fig.add_trace(go.Scatter(
x=months,
y=rolling_vol,
mode='lines',
name='Rolling 6-Month Volatility',
fill='tozeroy',
line=dict(color='#1e3a5f', width=2),
fillcolor='rgba(30, 58, 95, 0.2)',
hovertemplate='<b>%{x}</b><br>Volatility: %{y:.2f}%<extra></extra>'
))
fig.update_layout(
title='Rolling 6-Month Volatility (Annualized)',
xaxis_title='Month',
yaxis_title='Volatility (%)',
hovermode='x unified',
template='plotly_white',
height=400
)
return fig.to_json()
def _fig_to_base64(self, fig) -> str:
"""Convert matplotlib figure to base64-encoded string."""
buffer = io.BytesIO()
fig.savefig(buffer, format='png', dpi=100, bbox_inches='tight')
buffer.seek(0)
image_base64 = base64.b64encode(buffer.read()).decode('utf-8')
plt.close(fig)
return f"data:image/png;base64,{image_base64}"
def _transform_mc_projections_for_table(self, mc_projections: pd.DataFrame) -> List[Dict]:
"""Transform Monte Carlo projections from year-based to percentile-based format.
Converts from:
Year,P10_Percentile,P25_Percentile,P50_Median,P75_Percentile,P90_Percentile
1,3912983.71,4397714.92,4982812.41,5660175.46,6338517.74
2,4136171.92,4887087.4,5847577.44,6961924.37,8185580.79
To:
Percentile,Year 1,Year 2,Year 3,Year 4,Year 5
P10,3912983.71,4136171.92,...
P25,4397714.92,4887087.4,...
Args:
mc_projections: DataFrame with columns Year, P10_Percentile, P25_Percentile, etc.
Returns:
List of dictionaries, one per percentile
"""
percentile_columns = {
'P10_Percentile': 'P10',
'P25_Percentile': 'P25',
'P50_Median': 'P50',
'P75_Percentile': 'P75',
'P90_Percentile': 'P90'
}
result = []
for col_name, percentile_label in percentile_columns.items():
row_data = {'Percentile': percentile_label}
# Extract values for each year
for idx, year_row in mc_projections.iterrows():
year_num = int(year_row['Year'])
year_col = f'Year {year_num}'
row_data[year_col] = year_row[col_name]
result.append(row_data)
return result
def generate_report_data(self) -> Dict:
"""Generate all data needed for the financial advisor report.
This is the main method that orchestrates data loading, aggregation,
and metric calculation for all report sections.
Returns:
Dictionary containing all report data ready for visualization/rendering
"""
# Load all data sources
data = self.load_data()
# Calculate period metrics
period_metrics = self.calculate_period_metrics(data['portfolio_df'])
# Generate all visualizations
visualizations = self.create_visualizations(
portfolio_df=data['portfolio_df'],
monthly_df=data['monthly_df']
)
# Monte Carlo summary is already a dict
mc_summary = data['mc_summary']
# Transform Monte Carlo projections for the table display
mc_projections_table = self._transform_mc_projections_for_table(data['mc_projections'])
# Generate recommendations (still use original format for calculations)
recommendations = self.generate_recommendations(
period_metrics=period_metrics,
mc_data={'projections': data['mc_projections'].to_dict('records'), 'summary': mc_summary},
portfolio_summary={
'current_value': data['current_portfolio_value'],
'date_range': data['data_coverage']['date_range'],
'total_months': data['data_coverage']['unique_months'],
'benchmark_coverage': data['data_coverage']['benchmark_coverage_pct']
}
)
# Package everything for the report
return {
'portfolio_summary': {
'current_value': data['current_portfolio_value'],
'date_range': data['data_coverage']['date_range'],
'total_months': data['data_coverage']['unique_months'],
'benchmark_coverage': data['data_coverage']['benchmark_coverage_pct']
},
'period_metrics': period_metrics,
'visualizations': visualizations,
'monte_carlo': {
'summary': mc_summary,
'projections': mc_projections_table
},
'recommendations': recommendations, # Add recommendations here
'monthly_data': data['portfolio_df'].to_dict('records'),
'raw_data': {
'portfolio_df': data['portfolio_df'],
'monthly_df': data['monthly_df'],
'mc_projections': data['mc_projections']
}
}
def generate_recommendations(self, period_metrics: Dict, mc_data: Dict, portfolio_summary: Dict) -> List[Dict]:
"""Generate prioritized recommendations based on portfolio metrics and Monte Carlo projections."""
recommendations = []
recent_metrics = period_metrics.get('Recent 12 Months', {})
full_period_metrics = period_metrics.get('Full Period', {})
# Rule 1: High volatility check (>15% annualized)
if recent_metrics.get('annualized_volatility', 0) > 0.15:
recommendations.append({
'priority': 'High',
'title': 'Volatility Management',
'principle': (
f"Your recent annualized volatility of {recent_metrics['annualized_volatility']*100:.1f}% "
"is significantly higher than typical diversified portfolios, increasing risk."
),
'options': [
{'label': 'Add Stability', 'description': 'Allocate 10-15% to investment-grade bonds or GICs.', 'impact': 'Reduce overall portfolio volatility and potential drawdowns.'},
{'label': 'Diversify Further', 'description': 'Add 15-20% to low-correlation assets (e.g., REITs, commodities).', 'impact': 'Improve risk-adjusted returns by smoothing out portfolio fluctuations.'},
{'label': 'Reduce Concentration', 'description': 'Cap any single position at 5% of the portfolio.', 'impact': 'Mitigate idiosyncratic risk from individual holdings.'}
],
'trade_offs': 'Lower volatility may reduce upside potential but improves portfolio stability and sleep-at-night factor.',
'current_data': {'annualized_volatility': recent_metrics['annualized_volatility']}
})
# Rule 2: Exceptional Sharpe (>3.0) -> Sustainability warning
# Note: The plan mentions 5.89, but the actual calculated is 1.76.
# I will use the plan's threshold of 3.0 for the rule.
if recent_metrics.get('sharpe_ratio', 0) > 3.0:
recommendations.append({
'priority': 'High',
'title': 'Preserve Recent Alpha (Sustainability Focus)',
'principle': (
f"Your recent 12-month Sharpe Ratio of {recent_metrics['sharpe_ratio']:.2f} "
"represents exceptional risk-adjusted performance that is unlikely to persist long-term. "
"It's crucial to consider its sustainability."
),
'options': [
{'label': 'Lock in Gains', 'description': 'Reduce position sizes by 20-30% on high-performing assets.', 'impact': 'Secure profits and reduce exposure to potential mean reversion.'},
{'label': 'Protective Strategy', 'description': 'Implement trailing stops at 15% below peak for volatile positions.', 'impact': 'Limit downside risk while allowing for continued upside participation.'},
{'label': 'Document & Monitor', 'description': 'Record current strategy and set quarterly review checkpoints with a Sharpe threshold of 2.0.', 'impact': 'Understand drivers of performance and adapt strategy as market conditions change.'}
],
'trade_offs': 'Reducing exposure may cap future upside but provides protection against a reversion to historical averages.',
'current_data': {'sharpe_ratio': recent_metrics['sharpe_ratio']}
})
# Rule 3: Wide projection range -> Scenario planning
# mc_data['projections'] is a list of dicts, each dict is a row.
# Need to find the P10 and P90 for the 5-year projection.
# The 'Year' column in mc_projections seems to be the projection year.
# Assuming 'Year 5' is the relevant one.
p10_value = None
p90_value = None
for proj in mc_data['projections']:
if proj.get('Percentile') == 'P10':
p10_value = proj.get('Year 5')
if proj.get('Percentile') == 'P90':
p90_value = proj.get('Year 5')
if p10_value is not None and p90_value is not None:
projection_range = p90_value - p10_value
# Define a threshold for "wide" range, e.g., if P90 is more than 2x P10
if p10_value > 0 and p90_value / p10_value > 2.0: # Example threshold
recommendations.append({
'priority': 'High',
'title': 'Scenario-Based Financial Planning',
'principle': (
f"Your 5-year Monte Carlo projections show a wide range from "
f"${p10_value:,.0f} (P10) to ${p90_value:,.0f} (P90). "
"This wide dispersion necessitates conservative financial planning."
),
'options': [
{'label': 'Base Spending on P25', 'description': 'Align essential spending with the P25 projection to ensure a safety margin.', 'impact': 'Reduces the risk of running out of funds in less favorable market conditions.'},
{'label': 'Hybrid Approach', 'description': 'Fund essential spending from P25 and discretionary spending from P50 projections.', 'impact': 'Balances current lifestyle with long-term financial security.'},
{'label': 'Annual Recalibration', 'description': 'Update projections yearly and adjust financial plans accordingly.', 'impact': 'Ensures your plan remains aligned with evolving market realities and personal goals.'}
],
'trade_offs': 'Conservative planning may delay some financial goals but significantly reduces the risk of shortfall.',
'current_data': {'P10_projection': p10_value, 'P90_projection': p90_value}
})
# Rule 4: Tax optimization based on account types (Placeholder)
# This requires more detailed account type information which is not directly in period_metrics or mc_data.
# For a real implementation, `monthly_df` would be needed to analyze account types.
# I will add a generic recommendation for now.
recommendations.append({
'priority': 'Medium',
'title': 'Tax Optimization Strategy',
'principle': (
"Reviewing your portfolio's tax efficiency can significantly enhance after-tax returns, "
"especially across different account types (e.g., tax-sheltered vs. taxable)."
),
'options': [
{'label': 'Asset Location', 'description': 'Place income-generating assets in tax-advantaged accounts and growth assets in taxable accounts.', 'impact': 'Minimize annual tax drag on investment returns.'},
{'label': 'Tax-Loss Harvesting', 'description': 'Periodically sell investments at a loss to offset capital gains and ordinary income.', 'impact': 'Reduce current year tax liability and potentially carry forward losses.'},
{'label': 'Contribution Review', 'description': 'Ensure optimal contributions to tax-advantaged accounts (e.g., RRSP, TFSA) based on your income and goals.', 'impact': 'Maximize tax-deferred growth and tax-free withdrawals.'}
],
'trade_offs': 'Tax strategies can be complex and may require professional advice; benefits vary based on individual tax situation.',
'current_data': {} # No specific data from current inputs
})
# Rule 5: Drawdown protection based on historical max
if full_period_metrics.get('max_drawdown', 0) < -0.10: # If max drawdown is worse than -10%
recommendations.append({
'priority': 'Medium',
'title': 'Drawdown Protection Strategy',
'principle': (
f"Your portfolio has experienced a maximum drawdown of {full_period_metrics['max_drawdown']*100:.1f}% "
"historically. Implementing strategies to mitigate future drawdowns is prudent."
),
'options': [
{'label': 'Diversify Asset Classes', 'description': 'Include assets with low correlation to equities, such as alternatives or commodities.', 'impact': 'Reduce the severity and duration of portfolio downturns.'},
{'label': 'Dynamic Asset Allocation', 'description': 'Adjust equity exposure based on market conditions or valuation metrics.', 'impact': 'Proactively reduce risk during periods of heightened market vulnerability.'},
{'label': 'Hedging Strategies', 'description': 'Consider using options or other derivatives to hedge against significant market declines.', 'impact': 'Provide direct protection against adverse market movements, though with associated costs.'}
],
'trade_offs': 'Drawdown protection strategies can incur costs or limit upside participation during bull markets.',
'current_data': {'max_drawdown': full_period_metrics['max_drawdown']}
})
# Sort recommendations by priority
priority_order = {'High': 0, 'Medium': 1, 'Low': 2}
return sorted(recommendations, key=lambda r: priority_order.get(r['priority'], 99))
def render_html(self, report_data: Dict) -> str:
"""Render the HTML report using Jinja2."""
# Set up Jinja2 environment
templates_dir = Path(__file__).parent.parent.parent / "templates"
env = Environment(
loader=FileSystemLoader(templates_dir),
autoescape=select_autoescape(['html', 'xml'])
)
# Add custom filters for currency and percentage formatting
def currency_filter(value):
if isinstance(value, (int, float)):
return f"${value:,.2f}"
return value
def percent_filter(value):
if isinstance(value, (int, float)):
return f"{value:.2%}"
return value
env.filters['currency'] = currency_filter
env.filters['percent'] = percent_filter
template = env.get_template("advisor_report.html")
# Render the template with the report data
html_output = template.render(
portfolio_summary=report_data['portfolio_summary'],
period_metrics=report_data['period_metrics'],
visualizations=report_data['visualizations'],
monte_carlo=report_data['monte_carlo'],
recommendations=report_data['recommendations'],
current_date=datetime.now().strftime('%B %d, %Y'),
current_year=datetime.now().year
)
return html_output