forensics
Close the learning loop on failed agent runs by collecting failure data, recording replays, and annotating outcomes for continuous improvement.
Instructions
Failure dataset & replays — close the learning loop on failed agent runs.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| action | Yes | ||
| projectRoot | No | ||
| replayId | No | ||
| payload | No | For action=record-replay: the Task() payload to store. | |
| outcome | No | For action=annotate-replay | |
| agent | No | For action=reflect: agent name (e.g. executor) | |
| dryRun | No | For action=reflect: only save the assembled prompt, no API call |
Implementation Reference
- src/mcp-server/index.js:91-107 (schema)MCP tool schema/definition for the 'forensics' tool. Defines the tool name, description, and inputSchema with actions (collect, summarize, write-learnings, list-replays, record-replay, load-replay, annotate-replay, reflect) and parameters.
{ name: 'forensics', description: 'Failure dataset & replays — close the learning loop on failed agent runs.', inputSchema: { type: 'object', properties: { action: { type: 'string', enum: ['collect', 'summarize', 'write-learnings', 'list-replays', 'record-replay', 'load-replay', 'annotate-replay', 'reflect'] }, projectRoot: { type: 'string' }, replayId: { type: 'string' }, payload: { type: 'object', description: 'For action=record-replay: the Task() payload to store.' }, outcome: { type: 'object', description: 'For action=annotate-replay' }, agent: { type: 'string', description: 'For action=reflect: agent name (e.g. executor)' }, dryRun: { type: 'boolean', description: 'For action=reflect: only save the assembled prompt, no API call' }, }, required: ['action'], }, }, - src/mcp-server/index.js:213-235 (handler)The MCP handler function (handleForensics) that dispatches forensics actions to the core logic functions like collectFailures, summarizeByAgent, writeLearnings, listReplays, recordReplay, loadReplay, annotateReplay, and reflect.
async function handleForensics(args) { const projectRoot = args.projectRoot; switch (args.action) { case 'collect': return collectFailures({ projectRoot }); case 'summarize': { const failures = await collectFailures({ projectRoot }); return summarizeByAgent(failures); } case 'write-learnings': { const failures = await collectFailures({ projectRoot }); return writeLearnings(failures, { projectRoot }); } case 'list-replays': return listReplays({ projectRoot }); case 'record-replay': return recordReplay(args.payload, { projectRoot }); case 'load-replay': return loadReplay(args.replayId, { projectRoot }); case 'annotate-replay': return annotateReplay(args.replayId, args.outcome, { projectRoot }); case 'reflect': return reflect({ agent: args.agent, projectRoot, dryRun: args.dryRun, apply: false, interactive: false, // MCP never auto-applies }); default: return { error: `Unknown action: ${args.action}` }; } } - src/mcp-server/index.js:246-253 (registration)The HANDLERS map that registers 'forensics' -> handleForensics, so the CallToolRequestSchema dispatcher routes the forensics tool to its handler.
const HANDLERS = { kit: handleKit, sync: handleSync, 'reverse-sync':handleReverseSync, gates: handleGates, forensics: handleForensics, install: handleInstall, }; - src/core/failures.js:14-33 (helper)Core helper collectFailures() - aggregates debug sessions, failed verifications, and forensics reports into a structured dataset. Used by the 'collect', 'summarize', and 'write-learnings' actions.
export async function collectFailures(opts = {}) { const projectRoot = path.resolve(opts.projectRoot ?? process.cwd()); const planning = path.join(projectRoot, '.planning'); const [debugFailures, verifyFailures, forensicsReports] = await Promise.all([ readDebugSessions(path.join(planning, 'debug', 'resolved')), readFailedVerifications(path.join(planning, 'phases')), readForensics(path.join(planning, 'forensics')), ]); return { projectRoot, counts: { debug: debugFailures.length, verify: verifyFailures.length, forensics: forensicsReports.length, }, items: [...debugFailures, ...verifyFailures, ...forensicsReports], }; } - src/core/replays.js:20-65 (helper)Core helper functions (recordReplay, listReplays, loadReplay, annotateReplay) for managing replay data - used by the replay-related forensics actions.
export async function recordReplay(payload, opts = {}) { const projectRoot = path.resolve(opts.projectRoot ?? process.cwd()); const dir = path.join(projectRoot, REPLAY_DIR_REL); await fs.mkdir(dir, { recursive: true }); const ts = new Date().toISOString().replace(/[:.]/g, '-'); const slug = [payload.phase, payload.plan, payload.agent].filter(Boolean).join('-') || 'unknown'; const id = `${ts}-${slug}`; const file = path.join(dir, `${id}.json`); const record = { id, recorded_at: new Date().toISOString(), ...payload }; await fs.writeFile(file, JSON.stringify(record, null, 2), 'utf8'); return { id, file, record }; } export async function listReplays(opts = {}) { const projectRoot = path.resolve(opts.projectRoot ?? process.cwd()); const dir = path.join(projectRoot, REPLAY_DIR_REL); let entries; try { entries = await fs.readdir(dir); } catch { return []; } const items = []; for (const e of entries) { if (!e.endsWith('.json')) continue; try { const r = JSON.parse(await fs.readFile(path.join(dir, e), 'utf8')); items.push({ id: r.id, agent: r.agent, phase: r.phase, plan: r.plan, recorded_at: r.recorded_at }); } catch {} } return items.sort((a, b) => (b.recorded_at ?? '').localeCompare(a.recorded_at ?? '')); } export async function loadReplay(id, opts = {}) { const projectRoot = path.resolve(opts.projectRoot ?? process.cwd()); const file = path.join(projectRoot, REPLAY_DIR_REL, `${id}.json`); const raw = await fs.readFile(file, 'utf8'); return JSON.parse(raw); } export async function annotateReplay(id, outcome, opts = {}) { const projectRoot = path.resolve(opts.projectRoot ?? process.cwd()); const file = path.join(projectRoot, REPLAY_DIR_REL, `${id}.json`); const r = JSON.parse(await fs.readFile(file, 'utf8')); r.outcome = { ...(r.outcome ?? {}), ...outcome, annotated_at: new Date().toISOString() }; await fs.writeFile(file, JSON.stringify(r, null, 2), 'utf8'); return r; }