Skip to main content
Glama

actors-mcp-server

Official
by apify
eval-single.ts2.73 kB
#!/usr/bin/env tsx import dotenv from 'dotenv'; import log from '@apify/log'; import { loadTools, createOpenRouterTask, createToolSelectionLLMEvaluator, loadTestCases, filterById, type TestCase } from './evaluation-utils.js'; import { PASS_THRESHOLD, sanitizeHeaderValue } from './config.js'; dotenv.config({ path: '.env' }); log.setLevel(log.LEVELS.INFO); // const MODEL_NAME = 'openai/gpt-4.1-mini'; const MODEL_NAME = 'anthropic/claude-haiku-4.5' const RUN_LLM_JUDGE = true; // Hardcoded examples for quick testing const EXAMPLES: TestCase[] = [ ]; EXAMPLES.push(...filterById(loadTestCases('test-cases.json').testCases, 'weather-mcp-search-then-call-1')); async function main() { process.env.OPENROUTER_API_KEY = sanitizeHeaderValue(process.env.OPENROUTER_API_KEY); console.log(`\nEvaluating ${EXAMPLES.length} examples\n`); // 1. Load tools const tools = await loadTools(); console.log(`Loaded ${tools.length} tools\n`); // Loop through each example for (let i = 0; i < EXAMPLES.length; i++) { const example = EXAMPLES[i]; console.log(`\n=== Example ${i + 1}/${EXAMPLES.length}: ${example.id} ===`); console.log('Query:', example.query); console.log('Expected tools:', example.expectedTools); // 2. Call LLM with tools console.log('\nRunning LLM tool calling'); const task = createOpenRouterTask(MODEL_NAME, tools); const output = await task({ input: example as unknown as Record<string, unknown> }); console.log('\nLLM response'); console.log('Tool calls:', JSON.stringify(output.tool_calls, null, 2)); console.log('Message:', output.llm_response || '(no message)'); if (!RUN_LLM_JUDGE) { console.log('Skipping LLM evaluation as RUN_LLM_JUDGE is set to false'); console.log('='.repeat(50)); } else { // 3. Evaluate with LLM judge console.log('\nEvaluating with LLM'); const llmEvaluator = createToolSelectionLLMEvaluator(tools); const result = await llmEvaluator.evaluate({ input: example as unknown as Record<string, unknown>, output, expected: example as unknown as Record<string, unknown> }); const passed = result.score ? (result.score > PASS_THRESHOLD) : false; console.log('\nEvaluation result'); console.log('Score:', result.score ); console.log('Explanation:', result.explanation); console.log('Passed:', result.score ? (passed ? 'True ✅' : 'False ❌') : 'False ❌'); console.log('='.repeat(50)); } } } main().catch(console.error);

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/apify/actors-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server