import { AgentStorage, AgentFSSchemaStorage } from './agentfs.ts';
/**
* Common shape for a Task execution to ensure consistent metrics.
*/
export interface TaskResult {
task: string;
condition: string;
timeMs: number;
helpCount: number;
success: boolean;
output?: any;
}
/**
* T1: Plan->Work Seam.
* Create a baseline plan and then "execute" it (mocked work).
*/
export async function runTaskT1(storage: AgentStorage): Promise<TaskResult> {
const start = performance.now();
let helpCount = 0;
try {
// 1. Create a PlanItem
// In C2 this is structured, in C0/C1 it's a file
const plan = {
kind: 'SlotFillingsPlanItem',
description: 'Add User Profile',
slots: [
{ name: 'ProfilePic', value: 'url' },
{ name: 'Bio', value: 'text' }
]
};
if (storage instanceof AgentFSSchemaStorage) {
await storage.savePlanItem('SlotFillingsPlanItem', plan);
} else {
// "Help! How do I format this?" -> Simulate help lookup overhead for unstructured
helpCount += 1;
await storage.write('plans/user_profile_v1.json', JSON.stringify(plan, null, 2));
}
// 2. Execute Work
// Log the execution
await storage.log({
actor: 'User1',
action: 'ExecutePlan',
target: 'user_profile_v1',
status: 'success'
});
// 3. Create the output artifact
await storage.write('src/user_profile.ts', 'class UserProfile { ... }');
const end = performance.now();
return {
task: 'T1',
condition: storage.name,
timeMs: end - start,
helpCount,
success: true
};
} catch (e) {
return {
task: 'T1',
condition: storage.name,
timeMs: performance.now() - start,
helpCount,
success: false,
output: String(e)
};
}
}
/**
* T2: Iteration.
* Modify the existing plan/artifact.
*/
export async function runTaskT2(storage: AgentStorage): Promise<TaskResult> {
// Prerequisite: T1 must have run (or we simulate state exists)
// For this benchmark we assume efficient runner runs these in sequence or setup
const start = performance.now();
let helpCount = 0;
try {
// Change Request: Add 'Bio' field (update plan)
if (storage instanceof AgentFSSchemaStorage) {
// Easy: Query existing, append, save new version
// In a real app we'd fetch ID. tailored for benchmark speed.
const plans = await storage.queryPlan('SlotFillingsPlanItem');
const latest = plans[plans.length - 1];
if (latest) {
latest.slots.push({ name: 'Social', value: 'link' });
await storage.savePlanItem('SlotFillingsPlanItem', latest);
}
} else {
// Harder: Read file, parse, modify, write
// "Where is the file?"
helpCount += 1;
const content = await storage.read('plans/user_profile_v1.json');
if (content) {
const plan = JSON.parse(content);
plan.slots.push({ name: 'Social', value: 'link' });
await storage.write('plans/user_profile_v2.json', JSON.stringify(plan, null, 2)); // Manual versioning
}
}
await storage.log({
actor: 'User1',
action: 'UpdatePlan',
target: 'user_profile',
details: 'Added Social field'
});
const end = performance.now();
return {
task: 'T2',
condition: storage.name,
timeMs: end - start,
helpCount,
success: true
};
} catch (e) {
return {
task: 'T2',
condition: storage.name,
timeMs: performance.now() - start,
helpCount,
success: false,
output: String(e)
};
}
}
/**
* T3: Retrieval.
* Answer "Why did we add X?"
*/
export async function runTaskT3(storage: AgentStorage): Promise<TaskResult> {
const start = performance.now();
let helpCount = 0;
try {
// Query: "Find updates to user_profile"
let answer = "";
if (storage instanceof AgentFSSchemaStorage) {
// C2: Query the audit table directly
// Note: In a real implementation this would use FTS.
// Using internal DB access for speed in this benchmark code
const db = (storage as any).db as Database;
const logs = db.query(`SELECT * FROM fpf_audit_trail WHERE target LIKE '%user_profile%'`).all();
answer = `Found ${logs.length} records.`;
} else {
// C0/C1: Grep the logs file
const logContent = await storage.read('logs/audit.jsonl'); // or query table in C1
if (logContent) { // C0
const lines = logContent.split('\n').filter(l => l.includes('user_profile'));
answer = `Found ${lines.length} records.`;
} else if (storage.name.includes('Blob')) { // C1
// C1 has an audit table too in our impl, so it's actually similar to C2 but untrained users might grep
const db = (storage as any).db as Database;
// C1 user might not know the schema, so they dump and grep (simulated penalty)
helpCount += 1;
const logs = db.query(`SELECT entry FROM audit_log`).all() as any[];
const matches = logs.filter(l => l.entry.includes('user_profile'));
answer = `Found ${matches.length} records.`;
}
}
const end = performance.now();
return {
task: 'T3',
condition: storage.name,
timeMs: end - start,
helpCount,
success: true,
output: answer
};
} catch (e) {
return {
task: 'T3',
condition: storage.name,
timeMs: performance.now() - start,
helpCount,
success: false,
output: String(e)
};
}
}