Skip to main content
Glama

@arizeai/phoenix-mcp

Official
by Arize-ai
run_evals.py1.64 kB
# type: ignore """ Queries Phoenix for spans within the last minute. Computes and logs evaluations back to Phoenix. This script is intended to run once a minute as a cron job. """ from datetime import datetime, timedelta import phoenix as px from phoenix.evals import ( HallucinationEvaluator, OpenAIModel, QAEvaluator, RelevanceEvaluator, run_evals, ) from phoenix.session.evaluation import get_qa_with_reference, get_retrieved_documents from phoenix.trace import DocumentEvaluations, SpanEvaluations phoenix_client = px.Client() last_eval_run_time = datetime.now() - timedelta( minutes=1, seconds=10 ) # add a few seconds to ensure all spans are captured qa_spans_df = get_qa_with_reference(phoenix_client, start_time=last_eval_run_time) retriever_spans_df = get_retrieved_documents(phoenix_client, start_time=last_eval_run_time) eval_model = OpenAIModel( model_name="gpt-4-turbo-preview", ) hallucination_evaluator = HallucinationEvaluator(eval_model) qa_correctness_evaluator = QAEvaluator(eval_model) relevance_evaluator = RelevanceEvaluator(eval_model) [hallucination_evals_df, qa_correctness_evals_df] = run_evals( qa_spans_df, [hallucination_evaluator, qa_correctness_evaluator], ) relevance_evals_df = run_evals( retriever_spans_df, [relevance_evaluator], )[0] phoenix_client.log_evaluations( SpanEvaluations(eval_name="Hallucination", dataframe=hallucination_evals_df), SpanEvaluations(eval_name="QA Correctness", dataframe=qa_correctness_evals_df), DocumentEvaluations(eval_name="Relevance", dataframe=relevance_evals_df), ) print("Evaluations logged to Phoenix")

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server