de en es ja ko ru zh

actors-mcp-server

Official

by apify

Overview Schema Related Servers Score Discussions

TypeScript

Hybrid

actors-mcp-server
evals

eval-single.ts•2.67 KiB

#!/usr/bin/env tsx import dotenv from 'dotenv'; import log from '@apify/log'; import { loadTools, createOpenRouterTask, createToolSelectionLLMEvaluator, loadTestCases, filterById, type TestCase } from './evaluation-utils.js'; import { PASS_THRESHOLD, sanitizeHeaderValue } from './config.js'; dotenv.config({ path: '.env' }); log.setLevel(log.LEVELS.INFO); // const MODEL_NAME = 'openai/gpt-4.1-mini'; const MODEL_NAME = 'anthropic/claude-haiku-4.5' const RUN_LLM_JUDGE = true; // Hardcoded examples for quick testing const EXAMPLES: TestCase[] = [ ]; EXAMPLES.push(...filterById(loadTestCases('test-cases.json').testCases, 'weather-mcp-search-then-call-1')); async function main() { process.env.OPENROUTER_API_KEY = sanitizeHeaderValue(process.env.OPENROUTER_API_KEY); console.log(`\nEvaluating ${EXAMPLES.length} examples\n`); // 1. Load tools const tools = await loadTools(); console.log(`Loaded ${tools.length} tools\n`); // Loop through each example for (let i = 0; i < EXAMPLES.length; i++) { const example = EXAMPLES[i]; console.log(`\n=== Example ${i + 1}/${EXAMPLES.length}: ${example.id} ===`); console.log('Query:', example.query); console.log('Expected tools:', example.expectedTools); // 2. Call LLM with tools console.log('\nRunning LLM tool calling'); const task = createOpenRouterTask(MODEL_NAME, tools); const output = await task({ input: example as unknown as Record<string, unknown> }); console.log('\nLLM response'); console.log('Tool calls:', JSON.stringify(output.tool_calls, null, 2)); console.log('Message:', output.llm_response || '(no message)'); if (!RUN_LLM_JUDGE) { console.log('Skipping LLM evaluation as RUN_LLM_JUDGE is set to false'); console.log('='.repeat(50)); } else { // 3. Evaluate with LLM judge console.log('\nEvaluating with LLM'); const llmEvaluator = createToolSelectionLLMEvaluator(tools); const result = await llmEvaluator.evaluate({ input: example as unknown as Record<string, unknown>, output, expected: example as unknown as Record<string, unknown> }); const passed = result.score ? (result.score > PASS_THRESHOLD) : false; console.log('\nEvaluation result'); console.log('Score:', result.score ); console.log('Explanation:', result.explanation); console.log('Passed:', result.score ? (passed ? 'True ✅' : 'False ❌') : 'False ❌'); console.log('='.repeat(50)); } } } main().catch(console.error);

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/apify/actors-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

eval-single.ts•2.67 KiB