Fonoster MCP Server

Official

Overview Schema Related Servers Score Discussions

evalScenarios.integration.test.ts•7.92 KiB

/** * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com) * http://github.com/fonoster/fonoster * * This file is part of Fonoster * * Licensed under the MIT License (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * https://opensource.org/licenses/MIT * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * Integration test for autopilot evaluation scenarios. * * Loads etc/autopilot.yaml, injects API keys from config/integrations.json, * runs evalTestCases (no API server needed), and asserts that every scenario * passes -- including tool-call expectations (e.g. hangup). * * Skips gracefully when the YAML or credentials file is missing. */ import * as fs from "fs"; import * as path from "path"; import { expect } from "chai"; import * as yaml from "js-yaml"; import { assistantSchema } from "@fonoster/common"; import { ExpectedTextType, ScenarioEvaluationReport } from "@fonoster/types"; import { evalTestCases } from "../../src/models/evaluations/evalTestCases"; // --------------------------------------------------------------------------- // Paths // --------------------------------------------------------------------------- const REPO_ROOT = path.join(__dirname, "..", "..", "..", ".."); const AUTOPILOT_YAML = path.join(REPO_ROOT, "etc", "autopilot.yaml"); const INTEGRATIONS_JSON = path.join(REPO_ROOT, "config", "integrations.json"); // --------------------------------------------------------------------------- // Types (lightweight, test-only) // --------------------------------------------------------------------------- type Integration = { productRef: string; credentials?: { apiKey?: string }; }; type RawAutopilotConfig = { intelligence?: { config: Record<string, unknown> & { testCases?: { scenarios?: unknown[] }; }; }; }; type LanguageModelConfig = { provider?: string; apiKey?: string; temperature?: number; [key: string]: unknown; }; type TestCasesConfig = { scenarios: { conversation: { expected?: { text?: { type?: string } }; }[]; }[]; evalsLanguageModel?: { provider?: string; model?: string; apiKey?: string; }; }; type AssistantRawConfig = { languageModel?: LanguageModelConfig; testCases?: TestCasesConfig; }; // --------------------------------------------------------------------------- // Provider-to-productRef mapping // --------------------------------------------------------------------------- const PROVIDER_PRODUCT_REF: Record<string, string> = { openai: "llm.openai", anthropic: "llm.anthropic", groq: "llm.groq", google: "llm.google" }; // --------------------------------------------------------------------------- // Helper functions // --------------------------------------------------------------------------- /** Read and parse a JSON file, or return null if missing. */ function loadJsonFile<T>(filePath: string): T | null { if (!fs.existsSync(filePath)) return null; return JSON.parse(fs.readFileSync(filePath, "utf8")) as T; } /** Read and parse the autopilot YAML, or return null if missing. */ function loadAutopilotYaml(): RawAutopilotConfig | null { if (!fs.existsSync(AUTOPILOT_YAML)) return null; return yaml.load(fs.readFileSync(AUTOPILOT_YAML, "utf8")) as RawAutopilotConfig; } /** Look up an API key in the integrations array by productRef. */ function findApiKey( integrations: Integration[], productRef: string ): string | undefined { return integrations.find((i) => i.productRef === productRef)?.credentials ?.apiKey; } /** Resolve a provider name (e.g. "openai") to its productRef (e.g. "llm.openai"). */ function resolveProductRef(provider: string): string { return PROVIDER_PRODUCT_REF[provider] ?? "llm.google"; } /** * Inject API keys from integrations.json into the parsed config. * Also forces temperature to 0 for maximum eval determinism. */ function injectCredentials( config: AssistantRawConfig, integrations: Integration[] ): void { const provider = config.languageModel?.provider ?? "google"; config.languageModel = { ...config.languageModel, apiKey: findApiKey(integrations, resolveProductRef(provider)), temperature: 0 }; if (config.testCases?.evalsLanguageModel) { config.testCases.evalsLanguageModel = { ...config.testCases.evalsLanguageModel, apiKey: findApiKey(integrations, "llm.openai") }; } } /** * Normalize the expected text type strings from the YAML (e.g. "SIMILAR") * into the enum values that the evaluation engine expects. */ function normalizeExpectedTextTypes( scenarios: TestCasesConfig["scenarios"] ): void { for (const scenario of scenarios) { for (const step of scenario.conversation) { if (step.expected?.text?.type) { const raw = String(step.expected.text.type).toLowerCase(); (step.expected.text as { type: string }).type = raw === "similar" ? ExpectedTextType.SIMILAR : "EXACT"; } } } } /** Format a scenario report into a readable assertion message. */ function formatFailureMessage(report: ScenarioEvaluationReport): string { const details = report.steps?.map((s) => ({ passed: s.passed, errorMessage: s.errorMessage, toolEvaluations: s.toolEvaluations })); return `Scenario "${report.scenarioRef}" failed. Steps: ${JSON.stringify(details, null, 2)}`; } // --------------------------------------------------------------------------- // Test suite // --------------------------------------------------------------------------- describe("Autopilot Evaluation Scenarios", function () { this.timeout(240_000); let reports: ScenarioEvaluationReport[]; before(async function () { const raw = loadAutopilotYaml(); if (!raw?.intelligence?.config?.testCases?.scenarios?.length) { console.log("[eval] skipping: autopilot.yaml missing or has no scenarios"); this.skip(); return; } const integrations = loadJsonFile<Integration[]>(INTEGRATIONS_JSON); if (!integrations) { console.log("[eval] skipping: config/integrations.json not found"); this.skip(); return; } const config = raw.intelligence.config as AssistantRawConfig; injectCredentials(config, integrations); normalizeExpectedTextTypes(config.testCases!.scenarios); const parsed = assistantSchema.parse(config); console.log( "[eval] running %d scenario(s) with provider=%s model=%s", parsed.testCases?.scenarios?.length ?? 0, config.languageModel?.provider, (config.languageModel as Record<string, unknown>)?.model ); reports = await evalTestCases({ intelligence: { config: parsed } }); }); it("should run all scenarios successfully", function () { expect(reports, "evaluation reports should exist").to.be.an("array").that .is.not.empty; }); it("should pass every scenario", function () { for (const report of reports) { expect(report.overallPassed, formatFailureMessage(report)).to.be.true; } }); it("should pass conversation-termination with a hangup tool call", function () { const termination = reports.find( (r) => r.scenarioRef === "conversation-termination" ); expect(termination, "conversation-termination scenario should exist").to .exist; expect(termination!.overallPassed, formatFailureMessage(termination!)).to .be.true; // Verify the hangup tool was actually invoked const lastStep = termination!.steps?.[termination!.steps.length - 1]; expect( lastStep?.toolEvaluations, "last step should have tool evaluations" ).to.be.an("array").that.is.not.empty; expect( lastStep?.toolEvaluations?.[0]?.passed, "hangup tool evaluation should pass" ).to.be.true; }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/fonoster/fonoster'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

evalScenarios.integration.test.ts•7.92 KiB