"""Data models and schemas for investment statements."""
from datetime import datetime
from typing import Optional, Literal
from pydantic import BaseModel, Field
# ============================================================================
# Core Models
# ============================================================================
class AccountSummary(BaseModel):
"""Account summary information."""
account_number: str
account_type: str # e.g., "TFSA", "RRSP", "Margin"
institution: str # e.g., "Questrade", "Interactive Brokers"
statement_date: datetime
previous_statement_date: Optional[datetime] = None
# Balances
current_balance_cad: float
previous_balance_cad: Optional[float] = None
cash_cad: float
cash_usd: float
securities_value_cad: float
# FX rates
fx_rate_usd_to_cad: Optional[float] = None
# TFSA-specific (optional)
tfsa_contributions_ytd: Optional[float] = None
tfsa_contributions_total: Optional[float] = None
tfsa_withdrawals_total: Optional[float] = None
# Cash flow during period
opening_balance_cad: float
deposits_cad: float = 0.0
withdrawals_cad: float = 0.0
change_in_balance_cad: float = 0.0
# Performance returns (percentage)
return_mtd: Optional[float] = None # Month-to-date
return_qtd: Optional[float] = None # Quarter-to-date
return_ytd: Optional[float] = None # Year-to-date
return_1y: Optional[float] = None # 1 year
return_3y: Optional[float] = None # 3 year (annualized)
return_5y: Optional[float] = None # 5 year (annualized)
return_since_inception: Optional[float] = None # Since account opened
class Holding(BaseModel):
"""Security holding/position."""
symbol: str
description: str
quantity: float
currency: Literal["CAD", "USD"]
# Cost basis
cost_per_share: float
total_cost: float # position cost
cost_basis_type: str = "BK" # BK, HMV, HMVD, ND
# Market value
market_price: float
market_value: float
market_value_cad: float # Always in CAD for aggregation
# Performance
profit_loss: float
percent_return: float
percent_portfolio: float
# Additional info
security_type: str = "ETF" # ETF, Stock, Option, Bond, etc.
segregated: bool = False
# Holdings-level performance (for tracking individual positions over time)
first_purchase_date: Optional[datetime] = None
holding_period_days: Optional[int] = None
annualized_return: Optional[float] = None # Return normalized to 1 year
class Transaction(BaseModel):
"""Transaction record."""
transaction_date: datetime
settle_date: datetime
activity_type: str # e.g., "BUY", "SELL", "DIV", "DEP", "WITH", "FEE"
symbol: Optional[str] = None
description: str
quantity: Optional[float] = None
price: Optional[float] = None
gross_amount: float = 0.0
commission: float = 0.0
net_amount: float = 0.0
currency: Literal["CAD", "USD"] = "CAD"
class Benchmark(BaseModel):
"""Benchmark comparison data."""
name: str # e.g., "S&P/TSX Composite", "S&P 500"
symbol: Optional[str] = None # e.g., "^GSPTSE", "^GSPC"
# Returns for comparison periods (percentage)
return_mtd: Optional[float] = None
return_qtd: Optional[float] = None
return_ytd: Optional[float] = None
return_1y: Optional[float] = None
return_3y: Optional[float] = None
return_5y: Optional[float] = None
# Relative performance (portfolio - benchmark)
alpha_ytd: Optional[float] = None
alpha_1y: Optional[float] = None
alpha_since_inception: Optional[float] = None
class RiskMetrics(BaseModel):
"""Risk metrics for portfolio analysis."""
account_number: str
calculation_date: datetime
period_months: int = 12 # Number of months used for calculation
# Volatility measures
volatility: Optional[float] = None # Standard deviation of returns (annualized)
downside_volatility: Optional[float] = None # Downside deviation
# Market sensitivity
beta: Optional[float] = None # Beta vs benchmark
correlation: Optional[float] = None # Correlation to benchmark
# Risk-adjusted returns
sharpe_ratio: Optional[float] = None # (Return - RiskFree) / Volatility
sortino_ratio: Optional[float] = None # (Return - RiskFree) / DownsideVol
treynor_ratio: Optional[float] = None # (Return - RiskFree) / Beta
# Drawdown analysis
max_drawdown: Optional[float] = None # Largest peak-to-trough decline
max_drawdown_duration_days: Optional[int] = None
# Value at Risk
var_95: Optional[float] = None # 95% VaR (monthly)
var_99: Optional[float] = None # 99% VaR (monthly)
# ============================================================================
# Statement Container
# ============================================================================
class Statement(BaseModel):
"""Complete statement data."""
statement_id: str # Generated unique ID
file_path: str
institution: str
indexed_at: datetime = Field(default_factory=datetime.now)
summary: AccountSummary
holdings: list[Holding] = []
transactions: list[Transaction] = []
benchmarks: list[Benchmark] = [] # Benchmark comparisons from statement
# Archive paths
pdf_path: Optional[str] = None
json_path: Optional[str] = None
# ============================================================================
# Database Models
# ============================================================================
class StatementRecord(BaseModel):
"""Statement record for SQLite."""
statement_id: str
institution: str
account_number: str
statement_date: datetime
indexed_at: datetime
file_path: str
pdf_path: Optional[str] = None
json_path: Optional[str] = None
# Performance returns (for time-series analysis)
current_balance_cad: Optional[float] = None
return_mtd: Optional[float] = None
return_qtd: Optional[float] = None
return_ytd: Optional[float] = None
return_1y: Optional[float] = None
return_3y: Optional[float] = None
return_5y: Optional[float] = None
return_since_inception: Optional[float] = None
class HoldingRecord(BaseModel):
"""Holding record for SQLite."""
id: Optional[int] = None
statement_id: str
symbol: str
description: str
quantity: float
currency: str
cost_per_share: float
total_cost: float
market_price: float
market_value_cad: float
profit_loss: float
percent_return: float
security_type: str = "ETF"
annualized_return: Optional[float] = None
class TransactionRecord(BaseModel):
"""Transaction record for SQLite."""
id: Optional[int] = None
statement_id: str
transaction_date: datetime
settle_date: datetime
activity_type: str
symbol: Optional[str] = None
description: str
quantity: Optional[float] = None
price: Optional[float] = None
gross_amount: float
commission: float
net_amount: float
currency: str
class BenchmarkRecord(BaseModel):
"""Benchmark record for SQLite."""
id: Optional[int] = None
statement_id: str
name: str
symbol: Optional[str] = None
return_mtd: Optional[float] = None
return_qtd: Optional[float] = None
return_ytd: Optional[float] = None
return_1y: Optional[float] = None
return_3y: Optional[float] = None
return_5y: Optional[float] = None
alpha_ytd: Optional[float] = None
alpha_1y: Optional[float] = None
class RiskMetricsRecord(BaseModel):
"""Risk metrics record for SQLite (calculated from historical data)."""
id: Optional[int] = None
account_number: str
calculation_date: datetime
period_months: int = 12
# Core metrics
volatility: Optional[float] = None
beta: Optional[float] = None
sharpe_ratio: Optional[float] = None
max_drawdown: Optional[float] = None
var_95: Optional[float] = None
# ============================================================================
# Monte Carlo Simulation Models
# ============================================================================
class SimulationProjection(BaseModel):
"""Projection for a specific year in Monte Carlo simulation."""
year: int
months: int
percentiles: dict[str, float]
statistics: dict[str, float]
class MonteCarloResults(BaseModel):
"""Complete results from Monte Carlo simulation."""
simulation_metadata: dict
input_statistics: dict
year_by_year_projections: list[SimulationProjection]
visualizations: dict[str, str]
# ============================================================================
# Vector Store Models
# ============================================================================
class DocumentChunk(BaseModel):
"""Chunk for vector storage."""
chunk_id: str
statement_id: str
content: str
chunk_type: str # "summary", "holdings", "transactions", "full"
metadata: dict = {}