initialize.eval.ts•775 B
import { describeEval } from 'vitest-evals'
import { runTask } from '@repo/eval-tools/src/runTask'
import { checkFactuality } from '@repo/eval-tools/src/scorers'
import { eachModel } from '@repo/eval-tools/src/test-models'
import { initializeClient } from './utils'
eachModel('$modelName', ({ model }) => {
describeEval('Runs container initialize', {
data: async () => [
{
input: 'create and ping a container',
expected:
'The container_initialize tool was called and then the container_ping tool was called',
},
],
task: async (input) => {
const client = await initializeClient()
const { promptOutput } = await runTask(client, model, input)
return promptOutput
},
scorers: [checkFactuality],
threshold: 1,
timeout: 60000,
})
})