evaluate_workflow_result
Test n8n workflow payloads to identify issues and calculate evaluation scores for quality assurance.
Instructions
Run a single payload test and return evaluation score + issues.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| configPath | Yes | ||
| payloadName | Yes |
Implementation Reference
- src/index.ts:196-202 (handler)Handler for 'evaluate_workflow_result' tool: validates inputs with Zod, reads config, executes test payload, runs evaluation, and returns combined result with evaluation
if (name === 'evaluate_workflow_result') { const { configPath, payloadName } = z.object({ configPath: z.string(), payloadName: z.string() }).parse(args); const config = readConfig(configPath); const result = await testPayload(config, payloadName); const evaluation = evaluateRun(config, result); return { content: [{ type: 'text', text: JSON.stringify({ result, evaluation }, null, 2) }] }; } - src/evaluator.ts:12-73 (helper)Core evaluation logic: calculates tier1 and tier3 scores, checks HTTP status/timeout/output, applies tier3Checks, computes final score (70% tier1 + 30% tier3), and returns passed status with issues
export function evaluateRun(config: WorkflowTestConfig, result: TestRunResult): EvaluationResult { const issues: EvaluationIssue[] = []; let tier1 = 100; let tier3 = 100; if (!result.ok) { tier1 -= 50; issues.push({ tier: 'tier1', severity: 'error', check: 'http_ok', message: result.error || `HTTP ${result.status}` }); } if (result.durationMs > (config.timeoutMs ?? 30000)) { tier1 -= 25; issues.push({ tier: 'tier1', severity: 'error', check: 'timeout', message: `Response exceeded timeout: ${result.durationMs}ms` }); } if (result.output == null || result.output === '') { tier1 -= 25; issues.push({ tier: 'tier1', severity: 'error', check: 'not_empty', message: 'Output is empty' }); } for (const check of config.tier3Checks ?? []) { const actual = getField(result.output, check.field); let failed = false; switch (check.check) { case 'contains': failed = !String(actual ?? '').includes(String(check.value ?? '')); break; case 'not_contains': failed = String(actual ?? '').includes(String(check.value ?? '')); break; case 'min_length': failed = String(actual ?? '').length < Number(check.value ?? 0); break; case 'max_length': failed = String(actual ?? '').length > Number(check.value ?? Number.MAX_SAFE_INTEGER); break; case 'equals': failed = actual !== check.value; break; case 'not_empty': failed = actual == null || String(actual) === ''; break; } if (failed) { tier3 -= check.severity === 'warning' ? 10 : 20; issues.push({ tier: 'tier3', severity: check.severity ?? 'error', check: check.name, message: check.message ?? `Check failed on field ${check.field}`, }); } } tier1 = Math.max(0, tier1); tier3 = Math.max(0, tier3); const score = Math.round((tier1 * 0.7) + (tier3 * 0.3)); const passed = tier1 === 100 && score >= (config.qualityThreshold ?? 85) && !issues.some(i => i.severity === 'error'); return { passed, score, tier1Score: tier1, tier3Score: tier3, issues }; } - src/index.ts:22-30 (registration)Tool registration defining 'evaluate_workflow_result' with description and input schema requiring configPath and payloadName parameters
{ name: 'evaluate_workflow_result', description: 'Run a single payload test and return evaluation score + issues.', inputSchema: { type: 'object', properties: { configPath: { type: 'string' }, payloadName: { type: 'string' } }, required: ['configPath', 'payloadName'], }, }, - src/types.ts:38-51 (schema)Type definitions for evaluation: EvaluationIssue (tier, severity, check, message) and EvaluationResult (passed, score, tier1Score, tier3Score, issues)
export interface EvaluationIssue { tier: 'tier1' | 'tier3'; severity: 'error' | 'warning'; check: string; message: string; } export interface EvaluationResult { passed: boolean; score: number; tier1Score: number; tier3Score: number; issues: EvaluationIssue[]; } - src/n8n-client.ts:22-74 (helper)testPayload function: finds payload by name, executes webhook or workflow execute API call with timeout handling, returns TestRunResult with status/duration/output
export async function testPayload(config: WorkflowTestConfig, payloadName: string): Promise<TestRunResult> { const payload = config.testPayloads.find((p) => p.name === payloadName); if (!payload) throw new Error(`Payload not found: ${payloadName}`); const { baseUrl, defaultTimeoutMs } = getEnv(); const timeoutMs = config.timeoutMs ?? defaultTimeoutMs; const start = Date.now(); const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), timeoutMs); try { let response: Response; if (config.triggerMode === 'webhook') { if (!config.webhookPath) throw new Error('webhookPath is required in webhook mode'); const url = config.webhookPath.startsWith('http') ? config.webhookPath : `${baseUrl}${config.webhookPath.startsWith('/') ? '' : '/'}${config.webhookPath}`; response = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payload.data), signal: controller.signal, }); } else { if (!config.workflowId) throw new Error('workflowId is required in execute mode'); response = await fetch(`${baseUrl}/api/v1/workflows/${config.workflowId}/execute`, { method: 'POST', headers: buildHeaders(), body: JSON.stringify({ inputData: payload.data }), signal: controller.signal, }); } return { payloadName, ok: response.ok, status: response.status, durationMs: Date.now() - start, output: await safeJson(response), }; } catch (error) { return { payloadName, ok: false, status: 0, durationMs: Date.now() - start, output: null, error: error instanceof Error ? error.message : String(error), }; } finally { clearTimeout(timeout); } }