@arizeai/phoenix-mcp

Official

Overview Inspect Schema Related Servers Score Discussions

createEvaluator.ts•5.7 kB

import { withSpan } from "@arizeai/openinference-core"; import { EvaluatorBase } from "../core/EvaluatorBase"; import { FunctionEvaluator } from "../core/FunctionEvaluator"; import { EvaluationKind, OptimizationDirection, TelemetryConfig, } from "../types"; import { asEvaluatorFn } from "./asEvaluatorFn"; // eslint-disable-next-line @typescript-eslint/no-explicit-any type AnyFn = (...args: any[]) => any; function generateUniqueName(): string { return `evaluator-${Math.random().toString(36).substring(2, 15)}`; } /** * Options for creating a custom evaluator using {@link createEvaluator}. * * @public */ export type CreateEvaluatorOptions = { /** * The name of the evaluator / metric that it measures. * * If not provided, the function will attempt to infer the name from the function's `name` property. * If the function has no name, a unique name will be generated. * * @example * ```typescript * const evaluator = createEvaluator(myFunction, { name: "custom-metric" }); * ``` */ name?: string; /** * The kind of the evaluation. * * - `"CODE"`: Code-based evaluator that runs custom logic * - `"LLM"`: LLM-based evaluator that uses a language model * * @defaultValue `"CODE"` * * @example * ```typescript * const evaluator = createEvaluator(myFunction, { kind: "CODE" }); * ``` */ kind?: EvaluationKind; /** * The direction to optimize the numeric evaluation score. * * - `"MAXIMIZE"`: Higher scores are better (e.g., accuracy, F1 score) * - `"MINIMIZE"`: Lower scores are better (e.g., error rate, latency) * * @defaultValue `"MAXIMIZE"` * * @example * ```typescript * const evaluator = createEvaluator(myFunction, { * optimizationDirection: "MAXIMIZE" * }); * ``` */ optimizationDirection?: OptimizationDirection; /** * The telemetry configuration for the evaluator. * * When enabled, the evaluator will automatically create OpenTelemetry spans * for tracing and observability. This allows you to track evaluator performance * and debug issues in distributed systems. * * @defaultValue `{ isEnabled: true }` * * @example * ```typescript * const evaluator = createEvaluator(myFunction, { * telemetry: { isEnabled: true, tracer: myTracer } * }); * ``` */ telemetry?: TelemetryConfig; }; /** * A factory function for creating a custom evaluator from any function. * * This function wraps a user-provided function into an evaluator that can be used * with Phoenix experiments and evaluations. The function can be synchronous or * asynchronous, and can return a number, an {@link EvaluationResult} object, or * a value that will be automatically converted to an evaluation result. * * The evaluator will automatically: * - Convert the function's return value to an {@link EvaluationResult} * - Handle both sync and async functions * - Wrap the function with OpenTelemetry spans if telemetry is enabled * - Infer the evaluator name from the function name if not provided * * @typeParam RecordType - The type of the input record that the evaluator expects. * Must extend `Record<string, unknown>`. * @typeParam Fn - The type of the function being wrapped. Must be a function that * accepts the record type and returns a value compatible with {@link EvaluationResult}. * * @param fn - The function to wrap as an evaluator. Can be synchronous or asynchronous. * The function should accept a record of type `RecordType` and return either: * - A number (will be converted to `{ score: number }`) * - An {@link EvaluationResult} object * - Any value that can be converted to an evaluation result * * @param options - Optional configuration for the evaluator. See {@link CreateEvaluatorOptions} * for details on available options. * * @returns An {@link EvaluatorInterface} that can be used with Phoenix experiments * and evaluation workflows. * * @example * Basic usage with a simple scoring function: * ```typescript * const accuracyEvaluator = createEvaluator( * ({ output, expected }) => { * return output === expected ? 1 : 0; * }, * { * name: "accuracy", * kind: "CODE", * optimizationDirection: "MAXIMIZE" * } * ); * * const result = await accuracyEvaluator.evaluate({ * output: "correct answer", * expected: "correct answer" * }); * // result: { score: 1 } * ``` * * * @example * Returning a full EvaluationResult: * ```typescript * const qualityEvaluator = createEvaluator( * ({ output }) => { * const score = calculateQuality(output); * return { * score, * label: score > 0.8 ? "high" : "low", * explanation: `Quality score: ${score}` * }; * }, * { name: "quality" } * ); * ``` */ export function createEvaluator< RecordType extends Record<string, unknown> = Record<string, unknown>, Fn extends AnyFn = AnyFn, >(fn: Fn, options?: CreateEvaluatorOptions): EvaluatorBase<RecordType> { const { name, kind, optimizationDirection, telemetry = { isEnabled: true }, } = options || {}; const evaluatorName = name || fn.name || generateUniqueName(); let evaluateFn = asEvaluatorFn<RecordType>(fn); // Add OpenTelemetry span wrapping if telemetry is enabled if (telemetry && telemetry.isEnabled) { evaluateFn = withSpan(evaluateFn, { tracer: telemetry.tracer, name: evaluatorName, kind: "EVALUATOR", }); } return new FunctionEvaluator<RecordType>({ evaluateFn, name: evaluatorName, kind: kind || "CODE", optimizationDirection: optimizationDirection || "MAXIMIZE", telemetry, }); }

Latest Blog Posts

What Is Context Bloat in MCP?
By Om-Shree-0709 on December 16, 2025.
mcp
Context Bloat
MCP Moves to the Linux Foundation: Neutral Stewardship for Agentic Infrastructure
By Om-Shree-0709 on December 15, 2025.
mcp
anthropic
Linux Foundation
Code Execution with MCP: Architecting Agentic Efficiency
By Om-Shree-0709 on December 14, 2025.
mcp
Token bloat

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server