Skip to main content
Glama
Arize-ai

@arizeai/phoenix-mcp

Official
by Arize-ai
createEvaluator.test.ts16.5 kB
import { createEvaluator } from "../../src/helpers/createEvaluator"; import { EvaluationResult } from "../../src/types"; import { SpanKind } from "@opentelemetry/api"; import { InMemorySpanExporter, NodeTracerProvider, SimpleSpanProcessor, } from "@opentelemetry/sdk-trace-node"; import { afterEach, beforeEach, describe, expect, it } from "vitest"; type TestRecord = { input: string; output: string; expected?: string; [key: string]: unknown; }; describe("CreateEvaluator", () => { let spanExporter: InMemorySpanExporter; let tracerProvider: NodeTracerProvider; beforeEach(() => { // Set up in-memory span exporter and tracer provider spanExporter = new InMemorySpanExporter(); tracerProvider = new NodeTracerProvider({ spanProcessors: [new SimpleSpanProcessor(spanExporter)], }); tracerProvider.register(); }); afterEach(() => { // Clean up after each test spanExporter.reset(); tracerProvider.shutdown(); }); describe("basic functionality", () => { it("should create an evaluator from a sync function returning a number", async () => { const fn = ({ output, expected }: TestRecord) => { return output === expected ? 1 : 0; }; const evaluator = createEvaluator(fn, { name: "accuracy", }); const result = await evaluator.evaluate({ input: "test", output: "correct", expected: "correct", }); expect(result).toEqual({ score: 1 }); expect(evaluator.name).toBe("accuracy"); }); it("should create an evaluator from an async function returning a number", async () => { const fn = async ({ output, expected }: TestRecord) => { await new Promise((resolve) => setTimeout(resolve, 10)); return output === expected ? 1 : 0; }; const evaluator = createEvaluator(fn, { name: "async-accuracy", }); const result = await evaluator.evaluate({ input: "test", output: "correct", expected: "correct", }); expect(result).toEqual({ score: 1 }); }); it("should create an evaluator from a function returning an EvaluationResult", async () => { const fn = (_record: TestRecord): EvaluationResult => { return { score: 0.95, label: "high", explanation: "High quality output", }; }; const evaluator = createEvaluator(fn, { name: "quality", }); const result = await evaluator.evaluate({ input: "test", output: "some output", }); expect(result).toEqual({ score: 0.95, label: "high", explanation: "High quality output", }); }); it("should create an evaluator from a function returning a string (label)", async () => { const fn = ({ output }: TestRecord) => { return output.length > 10 ? "long" : "short"; }; const evaluator = createEvaluator(fn, { name: "length-checker", }); const result = await evaluator.evaluate({ input: "test", output: "very long output text", }); expect(result).toEqual({ label: "long" }); }); it("should return an EvaluatorInterface", () => { const fn = () => 1; const evaluator = createEvaluator(fn, { name: "test" }); expect(evaluator).toHaveProperty("evaluate"); expect(evaluator).toHaveProperty("name"); expect(evaluator).toHaveProperty("kind"); expect(typeof evaluator.evaluate).toBe("function"); }); }); describe("name inference", () => { it("should use the provided name when given", () => { const fn = () => 1; const evaluator = createEvaluator(fn, { name: "custom-name" }); expect(evaluator.name).toBe("custom-name"); }); it("should infer name from function name when no name provided", () => { function accuracyChecker() { return 1; } const evaluator = createEvaluator(accuracyChecker); expect(evaluator.name).toBe("accuracyChecker"); }); it("should generate a unique name when function has no name", () => { // Create a function and delete its name property to simulate no name const fn = () => 1; // Override the name property to be empty/falsy Object.defineProperty(fn, "name", { value: "", configurable: true }); const evaluator = createEvaluator(fn); expect(evaluator.name).toMatch(/^evaluator-[a-z0-9]+$/); }); it("should prioritize provided name over function name", () => { function myFunction() { return 1; } const evaluator = createEvaluator(myFunction, { name: "overridden-name", }); expect(evaluator.name).toBe("overridden-name"); }); }); describe("default values", () => { it("should default kind to CODE", () => { const fn = () => 1; const evaluator = createEvaluator(fn, { name: "test" }); expect(evaluator.kind).toBe("CODE"); }); it("should default optimizationDirection to MAXIMIZE", () => { const fn = () => 1; const evaluator = createEvaluator(fn, { name: "test" }); expect(evaluator.optimizationDirection).toBe("MAXIMIZE"); }); it("should default telemetry to enabled", () => { const fn = () => 1; const evaluator = createEvaluator(fn, { name: "test" }); expect(evaluator.telemetry).toEqual({ isEnabled: true }); }); }); describe("custom options", () => { it("should set kind to LLM when provided", () => { const fn = () => 1; const evaluator = createEvaluator(fn, { name: "test", kind: "LLM", }); expect(evaluator.kind).toBe("LLM"); }); it("should set optimizationDirection to MINIMIZE when provided", () => { const fn = () => 1; const evaluator = createEvaluator(fn, { name: "test", optimizationDirection: "MINIMIZE", }); expect(evaluator.optimizationDirection).toBe("MINIMIZE"); }); it("should accept custom telemetry configuration", () => { const tracer = tracerProvider.getTracer("test"); const fn = () => 1; const evaluator = createEvaluator(fn, { name: "test", telemetry: { isEnabled: true, tracer }, }); expect(evaluator.telemetry).toEqual({ isEnabled: true, tracer, }); }); it("should accept all options together", () => { const tracer = tracerProvider.getTracer("test"); const fn = () => 1; const evaluator = createEvaluator(fn, { name: "comprehensive-test", kind: "LLM", optimizationDirection: "MINIMIZE", telemetry: { isEnabled: false, tracer }, }); expect(evaluator.name).toBe("comprehensive-test"); expect(evaluator.kind).toBe("LLM"); expect(evaluator.optimizationDirection).toBe("MINIMIZE"); expect(evaluator.telemetry).toEqual({ isEnabled: false, tracer, }); }); }); describe("telemetry", () => { it("should enable telemetry by default", () => { const fn = () => 1; const evaluator = createEvaluator(fn, { name: "test" }); expect(evaluator.telemetry?.isEnabled).toBe(true); }); it("should disable telemetry when isEnabled is false", async () => { const fn = ({ output }: TestRecord) => { return output.length; }; const evaluator = createEvaluator(fn, { name: "test", telemetry: { isEnabled: false }, }); expect(evaluator.telemetry?.isEnabled).toBe(false); const result = await evaluator.evaluate({ input: "test", output: "hello", }); expect(result).toEqual({ score: 5 }); // Verify no spans were created when telemetry is disabled const spans = spanExporter.getFinishedSpans(); expect(spans).toHaveLength(0); }); it("should create spans when telemetry is enabled", async () => { const tracer = tracerProvider.getTracer("test"); const fn = ({ output }: TestRecord) => { return output.length; }; const evaluator = createEvaluator(fn, { name: "test-evaluator", telemetry: { isEnabled: true, tracer }, }); const result = await evaluator.evaluate({ input: "test", output: "hello", }); expect(result).toEqual({ score: 5 }); // Verify spans were created const spans = spanExporter.getFinishedSpans(); expect(spans).toHaveLength(1); const span = spans[0]; expect(span.name).toBe("test-evaluator"); expect(span.kind).toBe(SpanKind.INTERNAL); expect(span.status.code).toBe(1); // OK }); it("should create spans with correct attributes for async functions", async () => { const tracer = tracerProvider.getTracer("test"); const fn = async ({ output }: TestRecord) => { await new Promise((resolve) => setTimeout(resolve, 10)); return output.length; }; const evaluator = createEvaluator(fn, { name: "async-evaluator", telemetry: { isEnabled: true, tracer }, }); const result = await evaluator.evaluate({ input: "test", output: "hello world", }); expect(result).toEqual({ score: 11 }); const spans = spanExporter.getFinishedSpans(); expect(spans).toHaveLength(1); const span = spans[0]; expect(span.name).toBe("async-evaluator"); expect(span.status.code).toBe(1); // OK }); it("should record errors in spans when function throws", async () => { const tracer = tracerProvider.getTracer("test"); const fn = () => { throw new Error("Test error"); }; const evaluator = createEvaluator(fn, { name: "error-evaluator", telemetry: { isEnabled: true, tracer }, }); await expect( evaluator.evaluate({ input: "test", output: "test", }) ).rejects.toThrow("Test error"); const spans = spanExporter.getFinishedSpans(); expect(spans).toHaveLength(1); const span = spans[0]; expect(span.name).toBe("error-evaluator"); expect(span.status.code).toBe(2); // ERROR expect(span.status.message).toBe("Test error"); expect(span.events).toHaveLength(1); expect(span.events[0].name).toBe("exception"); }); it("should use global tracer when no tracer provided", async () => { const fn = ({ output }: TestRecord) => { return output.length; }; const evaluator = createEvaluator(fn, { name: "global-tracer-test", telemetry: { isEnabled: true }, }); const result = await evaluator.evaluate({ input: "test", output: "hello", }); expect(result).toEqual({ score: 5 }); // Spans should still be created using global tracer const spans = spanExporter.getFinishedSpans(); expect(spans.length).toBeGreaterThanOrEqual(0); // May or may not have spans depending on global tracer setup }); }); describe("evaluator behavior", () => { it("should evaluate records correctly", async () => { const fn = ({ output, expected }: TestRecord) => { return output === expected ? 1 : 0; }; const evaluator = createEvaluator(fn, { name: "test" }); const result1 = await evaluator.evaluate({ input: "test", output: "match", expected: "match", }); expect(result1).toEqual({ score: 1 }); const result2 = await evaluator.evaluate({ input: "test", output: "no match", expected: "match", }); expect(result2).toEqual({ score: 0 }); }); it("should handle errors thrown by the function", async () => { const fn = () => { throw new Error("Test error"); }; const evaluator = createEvaluator(fn, { name: "test" }); await expect( evaluator.evaluate({ input: "test", output: "test", }) ).rejects.toThrow("Test error"); }); it("should handle async errors", async () => { const fn = async () => { throw new Error("Async error"); }; const evaluator = createEvaluator(fn, { name: "test" }); await expect( evaluator.evaluate({ input: "test", output: "test", }) ).rejects.toThrow("Async error"); }); }); describe("type safety", () => { it("should work with custom record types", async () => { type CustomRecord = { question: string; answer: string; }; const fn = ({ question, answer }: CustomRecord) => { return question.length + answer.length; }; const evaluator = createEvaluator<CustomRecord>(fn, { name: "test" }); const result = await evaluator.evaluate({ question: "What is AI?", answer: "Artificial Intelligence", }); // "What is AI?" = 12 chars, "Artificial Intelligence" = 22 chars, total = 34 expect(result).toEqual({ score: 34 }); }); it("should preserve type information", () => { type CustomRecord = { value: number; }; const fn = ({ value }: CustomRecord) => value * 2; const evaluator = createEvaluator<CustomRecord>(fn, { name: "test" }); // TypeScript should enforce the correct type expect(evaluator).toBeDefined(); }); }); describe("edge cases", () => { it("should handle functions returning 0", async () => { const fn = () => 0; const evaluator = createEvaluator(fn, { name: "test" }); const result = await evaluator.evaluate({ input: "test", output: "test", }); expect(result).toEqual({ score: 0 }); }); it("should handle functions returning negative numbers", async () => { const fn = () => -1; const evaluator = createEvaluator(fn, { name: "test" }); const result = await evaluator.evaluate({ input: "test", output: "test", }); expect(result).toEqual({ score: -1 }); }); it("should handle functions returning null", async () => { const fn = () => null; const evaluator = createEvaluator(fn, { name: "test" }); const result = await evaluator.evaluate({ input: "test", output: "test", }); expect(result).toEqual({}); }); it("should handle functions returning undefined", async () => { const fn = () => undefined; const evaluator = createEvaluator(fn, { name: "test" }); const result = await evaluator.evaluate({ input: "test", output: "test", }); expect(result).toEqual({}); }); it("should handle functions with no parameters", async () => { const fn = () => 42; const evaluator = createEvaluator(fn, { name: "test" }); const result = await evaluator.evaluate({ input: "test", output: "test", }); expect(result).toEqual({ score: 42 }); }); }); describe("integration", () => { it("should create a FunctionEvaluator instance", () => { const fn = () => 1; const evaluator = createEvaluator(fn, { name: "test" }); // FunctionEvaluator should have evaluateFn property expect(evaluator).toHaveProperty("evaluateFn"); // eslint-disable-next-line @typescript-eslint/no-explicit-any expect(typeof (evaluator as any).evaluateFn).toBe("function"); }); it("should work with bindInputMapping", () => { const fn = () => 1; const evaluator = createEvaluator(fn, { name: "test" }); const boundEvaluator = evaluator.bindInputMapping({ mappedInput: "input", }); expect(boundEvaluator.inputMapping).toEqual({ mappedInput: "input", }); expect(boundEvaluator.name).toBe("test"); }); it("should maintain evaluator properties after binding", () => { const fn = () => 1; const evaluator = createEvaluator(fn, { name: "test", kind: "LLM", optimizationDirection: "MINIMIZE", }); const boundEvaluator = evaluator.bindInputMapping({ mapped: "value", }); expect(boundEvaluator.name).toBe("test"); expect(boundEvaluator.kind).toBe("LLM"); expect(boundEvaluator.optimizationDirection).toBe("MINIMIZE"); }); }); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server