Skip to main content
Glama
Arize-ai

@arizeai/phoenix-mcp

Official
by Arize-ai
createDocumentRelevanceEvaluator.test.ts9 kB
import { createDocumentRelevanceEvaluator } from "../../src/llm/createDocumentRelevanceEvaluator"; import * as generateClassificationModule from "../../src/llm/generateClassification"; import { openai } from "@ai-sdk/openai"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; describe("createDocumentRelevanceEvaluator", () => { beforeEach(() => { // Mock the OpenAI API key environment variable vi.stubEnv("OPENAI_API_KEY", "sk-dummy-test-key-12345"); }); afterEach(() => { // Clean up mocks vi.unstubAllEnvs(); vi.restoreAllMocks(); }); const model = openai("gpt-4o-mini"); const customDocumentRelevanceTemplate = ` Custom template for document relevance detection: Question: {{input}} Document text: {{documentText}} Is the document text relevant to the question? Respond with "relevant" or "unrelated". `; it("should create a document relevance evaluator with default template and choices", async () => { // Mock the generateClassification function const mockGenerateClassification = vi .spyOn(generateClassificationModule, "generateClassification") .mockResolvedValue({ label: "relevant", explanation: "The document text is relevant to the question", }); const evaluator = createDocumentRelevanceEvaluator({ model, }); const result = await evaluator.evaluate({ input: "What is Arize Phoenix?", documentText: "Arize Phoenix is a platform for building and deploying AI applications.", }); // Verify the function was called with default template and choices expect(mockGenerateClassification).toHaveBeenCalledWith( expect.objectContaining({ labels: ["relevant", "unrelated"], prompt: expect.arrayContaining([ expect.objectContaining({ role: "user", content: expect.stringContaining( "You are comparing a document to a question" ), }), ]), }) ); expect(result.label).toBe("relevant"); expect(result.score).toBe(1); // relevant = 1 in default choices expect(result.explanation).toBe( "The document text is relevant to the question" ); }); it("should use default optimization direction from config", () => { const evaluator = createDocumentRelevanceEvaluator({ model }); expect(evaluator.optimizationDirection).toBe("MAXIMIZE"); }); it("should allow overriding optimization direction", () => { const evaluator = createDocumentRelevanceEvaluator({ model, optimizationDirection: "MINIMIZE", }); expect(evaluator.optimizationDirection).toBe("MINIMIZE"); }); it("should support custom template", async () => { // Mock the generateClassification function const mockGenerateClassification = vi .spyOn(generateClassificationModule, "generateClassification") .mockResolvedValue({ label: "no", explanation: "The document text is not relevant to the question", }); const evaluator = createDocumentRelevanceEvaluator({ model, promptTemplate: customDocumentRelevanceTemplate, choices: { yes: 0, no: 1 }, // Custom choices for custom template }); const result = await evaluator.evaluate({ input: "How much does Arize Phoenix cost?", documentText: "Arize Phoenix is a platform for building and deploying AI applications.", }); // Verify the function was called with custom template expect(mockGenerateClassification).toHaveBeenCalledWith( expect.objectContaining({ labels: ["yes", "no"], prompt: expect.stringContaining( "Custom template for document relevance detection" ), }) ); expect(result.label).toBe("no"); expect(result.score).toBe(1); // no = 1 in custom choices }); it("should support custom choices with default template", async () => { // Mock the generateClassification function vi.spyOn( generateClassificationModule, "generateClassification" ).mockResolvedValue({ label: "relevant", explanation: "The document text is relevant to the question", }); const customChoices = { relevant: 0.8, unrelated: 0.2 }; const evaluator = createDocumentRelevanceEvaluator({ model, choices: customChoices, }); const result = await evaluator.evaluate({ input: "What is Arize Phoenix?", documentText: "Arize Phoenix is a platform for building and deploying AI applications.", }); expect(result.label).toBe("relevant"); expect(result.score).toBe(0.8); // Custom score for relevant }); it("should have telemetry enabled by default", async () => { // Mock the generateClassification function to spy on telemetry configuration const mockGenerateClassification = vi .spyOn(generateClassificationModule, "generateClassification") .mockResolvedValue({ label: "relevant", explanation: "This is a test explanation", }); const evaluator = createDocumentRelevanceEvaluator({ model, // Note: we're not explicitly setting telemetry options here }); await evaluator.evaluate({ input: "What is Arize Phoenix?", documentText: "Arize Phoenix is a platform for building and deploying AI applications.", }); // Verify that generateClassification was called without telemetry property (defaults to enabled) expect(mockGenerateClassification).toHaveBeenCalledWith( expect.not.objectContaining({ telemetry: expect.anything(), }) ); }); it("should respect explicitly disabled telemetry", async () => { // Mock the generateClassification function to spy on telemetry configuration const mockGenerateClassification = vi .spyOn(generateClassificationModule, "generateClassification") .mockResolvedValue({ label: "relevant", explanation: "This is a test explanation", }); const evaluator = createDocumentRelevanceEvaluator({ model, telemetry: { isEnabled: false }, // Explicitly disable telemetry }); await evaluator.evaluate({ input: "What is Arize Phoenix?", documentText: "Arize Phoenix is a platform for building and deploying AI applications.", }); // Verify that generateClassification was called with telemetry disabled expect(mockGenerateClassification).toHaveBeenCalledWith( expect.objectContaining({ telemetry: { isEnabled: false }, }) ); }); it("should support custom tracer in telemetry configuration", async () => { // Mock the generateClassification function const mockGenerateClassification = vi .spyOn(generateClassificationModule, "generateClassification") .mockResolvedValue({ label: "relevant", explanation: "This is a test explanation", }); const customTracer = {} as import("@opentelemetry/api").Tracer; // Mock tracer object const evaluator = createDocumentRelevanceEvaluator({ model, telemetry: { isEnabled: true, tracer: customTracer, }, }); await evaluator.evaluate({ input: "What is Arize Phoenix?", documentText: "Arize Phoenix is a platform for building and deploying AI applications.", }); // Verify that generateClassification was called with custom tracer expect(mockGenerateClassification).toHaveBeenCalledWith( expect.objectContaining({ telemetry: { isEnabled: true, tracer: customTracer, }, }) ); }); it("should properly interpolate template variables", async () => { // Mock the generateClassification function const mockGenerateClassification = vi .spyOn(generateClassificationModule, "generateClassification") .mockResolvedValue({ label: "relevant", explanation: "Template variables correctly interpolated", }); const evaluator = createDocumentRelevanceEvaluator({ model, }); const testInput = "What is the capital of France?"; const testOutput = "Paris is the capital and largest city of France."; await evaluator.evaluate({ documentText: testOutput, input: testInput, }); // Verify that the prompt contains the interpolated values expect(mockGenerateClassification).toHaveBeenCalledWith( expect.objectContaining({ prompt: expect.arrayContaining([ expect.objectContaining({ role: "user", content: expect.stringContaining(testInput), }), ]), }) ); expect(mockGenerateClassification).toHaveBeenCalledWith( expect.objectContaining({ prompt: expect.arrayContaining([ expect.objectContaining({ role: "user", content: expect.stringContaining(testOutput), }), ]), }) ); }); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server