@arizeai/phoenix-mcp

Official

227

7,296

Overview InspectNew Endpoints Schema Related Servers Reviews Score

experiment_evaluation_example.ts•2.99 kB

import { createHallucinationEvaluator } from "../src/llm"; import { openai } from "@ai-sdk/openai"; import { createOrGetDataset } from "@arizeai/phoenix-client/datasets"; import { asEvaluator, runExperiment, } from "@arizeai/phoenix-client/experiments"; import type { ExperimentTask } from "@arizeai/phoenix-client/types/experiments"; import { generateText } from "ai"; const model = openai("gpt-4o-mini"); const main = async () => { // Create your evaluator const hallucinationEvaluator = createHallucinationEvaluator({ model, }); // Create a dataset for your experiment const dataset = await createOrGetDataset({ name: "hallucination-eval", description: "Evaluate the hallucination of the model", examples: [ { input: { question: "Is ArizeAI Phoenix Open-Source?", context: "Phoenix is Open-Source.", }, }, // ... more examples ], }); // Define your experimental task const task: ExperimentTask = async (example) => { if (typeof example.input.question !== "string") { throw new Error("Invalid input: question must be a string"); } if (typeof example.input.context !== "string") { throw new Error("Invalid input: context must be a string"); } // Your AI system's response to the question return generateText({ model, experimental_telemetry: { isEnabled: true, }, prompt: [ { role: "system", content: `You answer questions based on this context: ${example.input.context}`, }, { role: "user", content: example.input.question, }, ], }).then((response) => { if (response.text) { return response.text; } throw new Error("Invalid response: text is required"); }); }; // Create a custom evaluator to validate results const hallucinationCheck = asEvaluator({ name: "hallucination", kind: "LLM", evaluate: async ({ input, output }) => { if (typeof input.question !== "string") { throw new Error("Invalid input: question must be a string"); } if (typeof input.context !== "string") { throw new Error("Invalid input: context must be a string"); } if (typeof output !== "string") { throw new Error("Invalid output: must be a string"); } // Use the hallucination evaluator from phoenix-evals const result = await hallucinationEvaluator.evaluate({ input: input.question, context: input.context, // Note: uses 'context' not 'reference' output: output, }); return result; // Return the evaluation result }, }); // Run the experiment with automatic tracing runExperiment({ experimentName: "hallucination-eval", experimentDescription: "Evaluate the hallucination of the model", dataset: dataset, task, evaluators: [hallucinationCheck], repetitions: 3, }); }; main();

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server