actors-mcp-server

Official

Overview Schema Related Servers Score Discussions

output-formatter.ts•4.97 KiB

/** * Output formatter for evaluation results */ import type { WorkflowTestCase } from './test-cases-loader.js'; import type { ConversationHistory } from './types.js'; import type { JudgeResult } from './workflow-judge.js'; /** * Single evaluation result */ export type EvaluationResult = { testCase: WorkflowTestCase; conversation: ConversationHistory; judgeResult: JudgeResult; durationMs: number; error?: string; } /** * Format results as a table */ export function formatResultsTable(results: EvaluationResult[]): string { const lines: string[] = []; // Header lines.push('='.repeat(100)); lines.push('Workflow Evaluation Results'); lines.push('='.repeat(100)); lines.push(''); // Individual results for (const result of results) { let status: string; if (result.error) { status = '🔥 ERROR'; } else if (result.judgeResult.verdict === 'PASS') { status = '✅ PASS'; } else { status = '❌ FAIL'; } lines.push(`${status} | ${result.testCase.id} | ${result.testCase.category}`); lines.push(` Query: ${result.testCase.query.slice(0, 80)}${result.testCase.query.length > 80 ? '...' : ''}`); if (result.error) { lines.push(` Error: ${result.error}`); } else { lines.push(` Turns: ${result.conversation.totalTurns} | Duration: ${result.durationMs}ms`); lines.push(` Reason: ${result.judgeResult.reason}`); } lines.push(''); } lines.push('-'.repeat(100)); lines.push(''); // Summary stats at the END const totalTests = results.length; const passedTests = results.filter((r) => !r.error && r.judgeResult.verdict === 'PASS').length; const failedTests = results.filter((r) => !r.error && r.judgeResult.verdict === 'FAIL').length; const errorTests = results.filter((r) => r.error).length; lines.push(`📊 Summary:`); lines.push(` Total tests: ${totalTests}`); lines.push(` Passed: ${passedTests} ✅`); lines.push(` Failed: ${failedTests} ❌`); lines.push(` Errors: ${errorTests} 🔥`); lines.push(''); // Final verdict - ALL tests must pass if (totalTests === 0) { lines.push('⚠️ No tests run'); } else if (passedTests === totalTests && errorTests === 0) { lines.push(`✅ Overall: PASS (${passedTests}/${totalTests} tests passed)`); } else { lines.push(`❌ Overall: FAIL (${passedTests}/${totalTests} tests passed, ${failedTests} failed, ${errorTests} errors)`); } lines.push('='.repeat(100)); return lines.join('\n'); } /** * Format a single result for verbose output */ export function formatDetailedResult(result: EvaluationResult): string { const lines: string[] = []; lines.push('='.repeat(100)); lines.push(`Test Case: ${result.testCase.id} (${result.testCase.category})`); lines.push('='.repeat(100)); lines.push(''); lines.push(`📝 Query:`); lines.push(` ${result.testCase.query}`); lines.push(''); lines.push(`📋 Requirements:`); lines.push(` ${result.testCase.reference}`); lines.push(''); if (result.error) { lines.push(`🔥 Error: ${result.error}`); lines.push(''); return lines.join('\n'); } lines.push(`💬 Conversation (${result.conversation.totalTurns} turns):`); for (const turn of result.conversation.turns) { lines.push(` Turn ${turn.turnNumber}:`); if (turn.toolCalls.length > 0) { for (const tc of turn.toolCalls) { lines.push(` 🔧 ${tc.name}(${JSON.stringify(tc.arguments)})`); } } if (turn.finalResponse) { const preview = turn.finalResponse.slice(0, 150); lines.push(` 💬 ${preview}${turn.finalResponse.length > 150 ? '...' : ''}`); } } lines.push(''); lines.push(`⚖️ Judge Verdict: ${result.judgeResult.verdict}`); lines.push(` Reason: ${result.judgeResult.reason}`); lines.push(''); lines.push(`⏱️ Duration: ${result.durationMs}ms`); lines.push(''); return lines.join('\n'); } /** * Single test result record stored in results database */ export type TestResultRecord = { /** ISO timestamp when test was run */ timestamp: string; /** Agent LLM model used */ agentModel: string; /** Judge LLM model used */ judgeModel: string; /** Test case ID */ testId: string; /** Test verdict (PASS or FAIL) */ verdict: 'PASS' | 'FAIL'; /** Judge reasoning or error message */ reason: string; /** Test duration in milliseconds */ durationMs: number; /** Number of conversation turns */ turns: number; /** Error message if execution failed, null otherwise */ error: string | null; } /** * Results database structure * Keys are in format: "{agentModel}:{judgeModel}:{testId}" */ export type ResultsDatabase = { version: string; results: Record<string, TestResultRecord>; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/apify/actors-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

output-formatter.ts•4.97 KiB