Skip to main content
Glama

evaluate_workflow_result

Test n8n workflow payloads to identify issues and calculate evaluation scores for quality assurance.

Instructions

Run a single payload test and return evaluation score + issues.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
configPathYes
payloadNameYes

Implementation Reference

  • Handler for 'evaluate_workflow_result' tool: validates inputs with Zod, reads config, executes test payload, runs evaluation, and returns combined result with evaluation
    if (name === 'evaluate_workflow_result') {
      const { configPath, payloadName } = z.object({ configPath: z.string(), payloadName: z.string() }).parse(args);
      const config = readConfig(configPath);
      const result = await testPayload(config, payloadName);
      const evaluation = evaluateRun(config, result);
      return { content: [{ type: 'text', text: JSON.stringify({ result, evaluation }, null, 2) }] };
    }
  • Core evaluation logic: calculates tier1 and tier3 scores, checks HTTP status/timeout/output, applies tier3Checks, computes final score (70% tier1 + 30% tier3), and returns passed status with issues
    export function evaluateRun(config: WorkflowTestConfig, result: TestRunResult): EvaluationResult {
      const issues: EvaluationIssue[] = [];
      let tier1 = 100;
      let tier3 = 100;
    
      if (!result.ok) {
        tier1 -= 50;
        issues.push({ tier: 'tier1', severity: 'error', check: 'http_ok', message: result.error || `HTTP ${result.status}` });
      }
    
      if (result.durationMs > (config.timeoutMs ?? 30000)) {
        tier1 -= 25;
        issues.push({ tier: 'tier1', severity: 'error', check: 'timeout', message: `Response exceeded timeout: ${result.durationMs}ms` });
      }
    
      if (result.output == null || result.output === '') {
        tier1 -= 25;
        issues.push({ tier: 'tier1', severity: 'error', check: 'not_empty', message: 'Output is empty' });
      }
    
      for (const check of config.tier3Checks ?? []) {
        const actual = getField(result.output, check.field);
        let failed = false;
        switch (check.check) {
          case 'contains':
            failed = !String(actual ?? '').includes(String(check.value ?? ''));
            break;
          case 'not_contains':
            failed = String(actual ?? '').includes(String(check.value ?? ''));
            break;
          case 'min_length':
            failed = String(actual ?? '').length < Number(check.value ?? 0);
            break;
          case 'max_length':
            failed = String(actual ?? '').length > Number(check.value ?? Number.MAX_SAFE_INTEGER);
            break;
          case 'equals':
            failed = actual !== check.value;
            break;
          case 'not_empty':
            failed = actual == null || String(actual) === '';
            break;
        }
    
        if (failed) {
          tier3 -= check.severity === 'warning' ? 10 : 20;
          issues.push({
            tier: 'tier3',
            severity: check.severity ?? 'error',
            check: check.name,
            message: check.message ?? `Check failed on field ${check.field}`,
          });
        }
      }
    
      tier1 = Math.max(0, tier1);
      tier3 = Math.max(0, tier3);
      const score = Math.round((tier1 * 0.7) + (tier3 * 0.3));
      const passed = tier1 === 100 && score >= (config.qualityThreshold ?? 85) && !issues.some(i => i.severity === 'error');
    
      return { passed, score, tier1Score: tier1, tier3Score: tier3, issues };
    }
  • src/index.ts:22-30 (registration)
    Tool registration defining 'evaluate_workflow_result' with description and input schema requiring configPath and payloadName parameters
    {
      name: 'evaluate_workflow_result',
      description: 'Run a single payload test and return evaluation score + issues.',
      inputSchema: {
        type: 'object',
        properties: { configPath: { type: 'string' }, payloadName: { type: 'string' } },
        required: ['configPath', 'payloadName'],
      },
    },
  • Type definitions for evaluation: EvaluationIssue (tier, severity, check, message) and EvaluationResult (passed, score, tier1Score, tier3Score, issues)
    export interface EvaluationIssue {
      tier: 'tier1' | 'tier3';
      severity: 'error' | 'warning';
      check: string;
      message: string;
    }
    
    export interface EvaluationResult {
      passed: boolean;
      score: number;
      tier1Score: number;
      tier3Score: number;
      issues: EvaluationIssue[];
    }
  • testPayload function: finds payload by name, executes webhook or workflow execute API call with timeout handling, returns TestRunResult with status/duration/output
    export async function testPayload(config: WorkflowTestConfig, payloadName: string): Promise<TestRunResult> {
      const payload = config.testPayloads.find((p) => p.name === payloadName);
      if (!payload) throw new Error(`Payload not found: ${payloadName}`);
    
      const { baseUrl, defaultTimeoutMs } = getEnv();
      const timeoutMs = config.timeoutMs ?? defaultTimeoutMs;
      const start = Date.now();
      const controller = new AbortController();
      const timeout = setTimeout(() => controller.abort(), timeoutMs);
    
      try {
        let response: Response;
        if (config.triggerMode === 'webhook') {
          if (!config.webhookPath) throw new Error('webhookPath is required in webhook mode');
          const url = config.webhookPath.startsWith('http')
            ? config.webhookPath
            : `${baseUrl}${config.webhookPath.startsWith('/') ? '' : '/'}${config.webhookPath}`;
          response = await fetch(url, {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify(payload.data),
            signal: controller.signal,
          });
        } else {
          if (!config.workflowId) throw new Error('workflowId is required in execute mode');
          response = await fetch(`${baseUrl}/api/v1/workflows/${config.workflowId}/execute`, {
            method: 'POST',
            headers: buildHeaders(),
            body: JSON.stringify({ inputData: payload.data }),
            signal: controller.signal,
          });
        }
    
        return {
          payloadName,
          ok: response.ok,
          status: response.status,
          durationMs: Date.now() - start,
          output: await safeJson(response),
        };
      } catch (error) {
        return {
          payloadName,
          ok: false,
          status: 0,
          durationMs: Date.now() - start,
          output: null,
          error: error instanceof Error ? error.message : String(error),
        };
      } finally {
        clearTimeout(timeout);
      }
    }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Souzix76/n8n-workflow-tester-safe'

If you have feedback or need assistance with the MCP directory API, please join our Discord server