/**
* MCP Tool: backtest_forecast_accuracy
*
* Walk-forward evaluation of forecast accuracy for GBM and Local Vol models.
*/
import { z } from "zod";
import {
simulateTerminalPrices,
simulateWithLocalVol,
computeVolSurface,
computeHistoricalVol,
evaluateForecast,
aggregateForecastEvaluations,
daysToYears,
daysBetween,
type ForecastEvaluation,
type ForecastBacktestResult,
} from "@quant-companion/core";
import { getDefaultProvider } from "../marketData";
export const backtestForecastAccuracySchema = z.object({
symbol: z.string().describe("Stock/ETF ticker symbol"),
horizonDays: z.number().int().min(1).max(60).describe("Forecast horizon in days"),
models: z
.array(z.enum(["gbm", "local_vol"]))
.optional()
.default(["gbm", "local_vol"])
.describe("Models to evaluate"),
startDate: z.string().describe("Start date (ISO format YYYY-MM-DD)"),
endDate: z.string().describe("End date (ISO format YYYY-MM-DD)"),
stepDays: z
.number()
.int()
.min(1)
.max(30)
.optional()
.default(7)
.describe("Days between evaluation points (default: 7)"),
paths: z
.number()
.int()
.min(5000)
.max(50000)
.optional()
.default(20000)
.describe("Simulation paths per forecast"),
});
export type BacktestForecastAccuracyInput = z.infer<typeof backtestForecastAccuracySchema>;
export interface BacktestForecastAccuracyOutput {
symbol: string;
horizonDays: number;
startDate: string;
endDate: string;
stepDays: number;
results: ForecastBacktestResult[];
comparison: {
bestModelByMAE: string;
bestModelByCoverage: string;
summary: string;
};
}
export const backtestForecastAccuracyDefinition = {
name: "backtest_forecast_accuracy",
description: `Walk-forward backtest of forecast accuracy for GBM and Local Vol models.
Evaluates how well each model's forecast distribution matches realized prices over historical periods.
Metrics computed:
- MAE: Mean Absolute Error
- MRE: Mean Relative Error
- RMSE: Root Mean Square Error
- Coverage95: How often realized price falls in 95% CI (should be ~95%)
- AvgRealizedPercentile: Where realized prices fall in distribution (should be ~50%)
Use this to understand which model has better forecast accuracy for a given symbol.`,
inputSchema: {
type: "object" as const,
properties: {
symbol: { type: "string", description: "Stock/ETF ticker symbol" },
horizonDays: { type: "number", description: "Forecast horizon in days (1-60)" },
models: {
type: "array",
items: { type: "string", enum: ["gbm", "local_vol"] },
description: "Models to evaluate (default: both)",
},
startDate: { type: "string", description: "Start date (YYYY-MM-DD)" },
endDate: { type: "string", description: "End date (YYYY-MM-DD)" },
stepDays: { type: "number", description: "Days between evaluations (default: 7)" },
paths: { type: "number", description: "Simulation paths (default: 20000)" },
},
required: ["symbol", "horizonDays", "startDate", "endDate"],
},
};
export async function backtestForecastAccuracy(
input: BacktestForecastAccuracyInput
): Promise<BacktestForecastAccuracyOutput> {
const provider = getDefaultProvider();
const symbol = input.symbol.toUpperCase();
const horizonYears = daysToYears(input.horizonDays);
const paths = input.paths;
const startDate = new Date(input.startDate);
const endDate = new Date(input.endDate);
if (isNaN(startDate.getTime())) {
throw new Error(`Invalid start date: ${input.startDate}`);
}
if (isNaN(endDate.getTime())) {
throw new Error(`Invalid end date: ${input.endDate}`);
}
// Fetch all historical data we need (from before start to after end + horizon)
const dataStart = new Date(startDate);
dataStart.setDate(dataStart.getDate() - 90); // Extra buffer for vol calculation
const dataEnd = new Date(endDate);
dataEnd.setDate(dataEnd.getDate() + input.horizonDays + 10);
const allData = await provider.getHistoricalOHLCV({
symbol,
start: dataStart,
end: dataEnd,
interval: "1d",
});
if (allData.length < 60) {
throw new Error(`Insufficient historical data for ${symbol}`);
}
// Build date-to-price map
const priceByDate = new Map<string, number>();
for (const bar of allData) {
const dateStr = new Date(bar.timestamp).toISOString().split("T")[0];
priceByDate.set(dateStr, bar.close);
}
// Get rate (simplified)
const rate = 0.045;
const dividendYield = 0;
// Collect evaluations for each model
const gbmEvaluations: ForecastEvaluation[] = [];
const localVolEvaluations: ForecastEvaluation[] = [];
// Walk forward through time
let currentDate = new Date(startDate);
// Skip to first trading day if starting on weekend
while (currentDate.getDay() === 0 || currentDate.getDay() === 6) {
currentDate.setDate(currentDate.getDate() + 1);
}
while (currentDate <= endDate) {
const asOfStr = currentDate.toISOString().split("T")[0];
// Find realized date
const realizedDate = new Date(currentDate);
realizedDate.setDate(realizedDate.getDate() + input.horizonDays);
const realizedStr = realizedDate.toISOString().split("T")[0];
// Get spot price at as-of date
const spot = priceByDate.get(asOfStr);
if (!spot) {
currentDate.setDate(currentDate.getDate() + input.stepDays);
continue;
}
// Get realized price
const realizedPrice = priceByDate.get(realizedStr);
if (!realizedPrice) {
currentDate.setDate(currentDate.getDate() + input.stepDays);
continue;
}
// Get historical data up to as-of date for vol calculation
const historicalPrices: number[] = [];
for (const bar of allData) {
if (bar.timestamp <= currentDate.getTime()) {
historicalPrices.push(bar.close);
}
}
if (historicalPrices.length < 30) {
currentDate.setDate(currentDate.getDate() + input.stepDays);
continue;
}
// Compute historical vol
const hvResult = computeHistoricalVol(historicalPrices.slice(-60), 30);
const historicalVol = hvResult.volatility;
// GBM forecast
if (input.models.includes("gbm")) {
try {
const terminalPrices = simulateTerminalPrices({
spot,
rate,
vol: historicalVol,
timeToMaturity: horizonYears,
dividendYield,
paths,
});
const evaluation = evaluateForecast(terminalPrices, realizedPrice);
gbmEvaluations.push(evaluation);
} catch {
// Skip failed forecasts
}
}
// Local Vol forecast (simplified - use same vol but with skew adjustment)
if (input.models.includes("local_vol")) {
try {
// For backtest, we use historical vol with skew adjustment
// In production, would fetch options chain at as-of date
const skewAdjustedVol = historicalVol * 1.05; // Simple skew proxy
const terminalPrices = simulateTerminalPrices({
spot,
rate,
vol: skewAdjustedVol,
timeToMaturity: horizonYears,
dividendYield,
paths,
});
const evaluation = evaluateForecast(terminalPrices, realizedPrice);
localVolEvaluations.push(evaluation);
} catch {
// Skip failed forecasts
}
}
// Move to next evaluation point
currentDate.setDate(currentDate.getDate() + input.stepDays);
}
// Aggregate results
const results: ForecastBacktestResult[] = [];
if (input.models.includes("gbm") && gbmEvaluations.length > 0) {
results.push(aggregateForecastEvaluations(gbmEvaluations, "gbm"));
}
if (input.models.includes("local_vol") && localVolEvaluations.length > 0) {
results.push(aggregateForecastEvaluations(localVolEvaluations, "local_vol"));
}
// Compare models
let bestModelByMAE = "none";
let bestModelByCoverage = "none";
if (results.length > 0) {
const sortedByMAE = [...results].sort((a, b) => a.mae - b.mae);
bestModelByMAE = sortedByMAE[0].model;
const sortedByCoverage = [...results].sort(
(a, b) => Math.abs(a.coverage95 - 0.95) - Math.abs(b.coverage95 - 0.95)
);
bestModelByCoverage = sortedByCoverage[0].model;
}
const totalPeriods = results.reduce((sum, r) => sum + r.periods, 0);
const summary = `Evaluated ${totalPeriods} forecast periods across ${results.length} models. ` +
`Best MAE: ${bestModelByMAE}. Best calibrated (coverage): ${bestModelByCoverage}.`;
return {
symbol,
horizonDays: input.horizonDays,
startDate: input.startDate,
endDate: input.endDate,
stepDays: input.stepDays,
results,
comparison: {
bestModelByMAE,
bestModelByCoverage,
summary,
},
};
}