@arizeai/phoenix-mcp

Official

Overview Schema Related Servers Score Discussions

phoenix
scripts
evaluators

test_create_llm_evaluator.py•5.39 KiB

#!/usr/bin/env python """ Script to create an LLM evaluator via GraphQL mutation. """ import asyncio from gql import Client, gql from gql.transport.aiohttp import AIOHTTPTransport MUTATION = gql(""" mutation CreateDatasetLLMEvaluator($input: CreateDatasetLLMEvaluatorInput!) { createDatasetLlmEvaluator(input: $input) { evaluator { id displayName evaluator { ... on LLMEvaluator { id name description } } } query { __typename } } } """) DATASET_ID = "RGF0YXNldDozNA==" async def main() -> None: transport = AIOHTTPTransport(url="http://localhost:6006/graphql") async with Client(transport=transport, fetch_schema_from_transport=False) as session: variables = { "input": { "datasetId": DATASET_ID, "name": "relevance-evaluator", "description": "Evaluates whether the output is relevant to the input", "promptVersion": { "templateFormat": "MUSTACHE", "template": { "messages": [ { "role": "SYSTEM", "content": [ { "text": { "text": ( "You are an expert evaluator. " "Your task is to assess whether the assistant's " "response is relevant to the user's question. " "Use the provided tool to submit your evaluation." ) } } ], }, { "role": "USER", "content": [ { "text": { "text": """Given the following: User Question: {{input}} Assistant Response: {{output}} Is the assistant's response relevant to the user's question?""" } } ], }, ] }, "invocationParameters": { "tool_choice": { "type": "function", "function": {"name": "relevance-evaluator"}, } }, "tools": [ { "definition": { "type": "function", "function": { "name": "relevance-evaluator", "description": ( "Evaluates whether the output is relevant to the input" ), "parameters": { "type": "object", "properties": { "relevance": { "type": "string", "enum": [ "relevant", "partially_relevant", "not_relevant", ], } }, "required": ["relevance"], }, }, } } ], "modelProvider": "OPENAI", "modelName": "gpt-4o-mini", }, "outputConfig": { "name": "relevance", "description": "Measures how relevant the response is to the input", "optimizationDirection": "MAXIMIZE", "values": [ {"label": "relevant", "score": 1.0}, {"label": "partially_relevant", "score": 0.5}, {"label": "not_relevant", "score": 0.0}, ], }, } } print("Creating LLM evaluator...") result = await session.execute(MUTATION, variable_values=variables) dataset_evaluator = result["createDatasetLlmEvaluator"]["evaluator"] llm_evaluator = dataset_evaluator["evaluator"] print("✅ Created evaluator!") print(f" DatasetEvaluator ID: {dataset_evaluator['id']}") print(f" Display Name: {dataset_evaluator['displayName']}") print(f" LLMEvaluator ID: {llm_evaluator['id']}") print(f" LLMEvaluator Name: {llm_evaluator['name']}") print(f" LLMEvaluator Description: {llm_evaluator['description']}") if __name__ == "__main__": asyncio.run(main())

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_create_llm_evaluator.py•5.39 KiB