Skip to main content
Glama

@arizeai/phoenix-mcp

Official
by Arize-ai
evaluate_traces.ipynb5.26 kB
{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import phoenix as px\n", "\n", "pxc = px.Client()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from phoenix.session.evaluation import get_retrieved_documents\n", "\n", "retrieved_documents = get_retrieved_documents(pxc, project_name=\"research_assistant\")\n", "retrieved_documents.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import nest_asyncio\n", "\n", "from phoenix.evals import (\n", " OpenAIModel,\n", " llm_classify,\n", ")\n", "\n", "nest_asyncio.apply()\n", "\n", "model = OpenAIModel(\n", " model=\"gpt-4o\",\n", " temperature=0.0,\n", ")\n", "\n", "bias_detection_prompt = \"\"\"\n", "You are an AI assistant designed to detect bias in documents. Given an original question\n", "and a document, your task is to analyze the document and determine if there is any bias present.\n", "\n", "Please follow these steps:\n", "1. Read the original question carefully.\n", "2. Read the document provided.\n", "3. Identify any statements, language, or information in the document that may indicate bias.\n", "4. Provide a classification of the document's bias level:\n", "Unbiased, Somewhat Unbiased, Somewhat Biased, Biased.\n", "5. Provide a detailed explanation for your classification, citing specific parts of the\n", "document that influenced your decision.\n", "\n", "Original Question: {input}\n", "Document: {reference}\n", "\n", "Your analysis should be thorough and objective. Please ensure that your explanation\n", "is clear and concise.\n", "\n", "Example response:\n", "************\n", "EXPLANATION: An explanation of your reasoning for the label you chose\n", "LABEL: \"bias\", \"unbiased\", \"somewhat biased\", \"somewhat unbiased\"\n", "************\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bias_classifications = llm_classify(\n", " dataframe=retrieved_documents,\n", " template=bias_detection_prompt,\n", " model=model,\n", " rails=[\"Unbiased\", \"Biased\", \"Somewhat Biased\", \"Somewhat Unbiased\"],\n", " provide_explanation=True,\n", ")\n", "bias_classifications[\"score\"] = bias_classifications[\"label\"].map(\n", " {\"unbiased\": 1, \"somewhat unbiased\": 0.75, \"somewhat biased\": 0.5, \"biased\": 0}\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bias_classifications.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "span_bias_classifications = bias_classifications.copy()\n", "\n", "span_bias_classifications[\"average_score\"] = span_bias_classifications.groupby(\"context.span_id\")[\n", " \"score\"\n", "].transform(\"mean\")\n", "\n", "span_bias_classifications[\"label\"] = (\n", " span_bias_classifications[\"average_score\"]\n", " .apply(\n", " lambda x: min(\n", " {1: \"unbiased\", 0.75: \"somewhat unbiased\", 0.5: \"somewhat biased\", 0: \"biased\"}.keys(),\n", " key=lambda k: abs(k - x),\n", " )\n", " )\n", " .map({1: \"unbiased\", 0.75: \"somewhat unbiased\", 0.5: \"somewhat biased\", 0: \"biased\"})\n", ")\n", "\n", "# Combine all rows with the same context.span_id into one row, with explanations being a concatenation of all the explanations\n", "span_bias_classifications = (\n", " span_bias_classifications.groupby(\"context.span_id\")\n", " .agg(\n", " {\n", " \"label\": \"first\",\n", " \"explanation\": lambda x: \"\\n----\\n\".join(x),\n", " \"exceptions\": \"first\",\n", " \"execution_status\": \"first\",\n", " \"execution_seconds\": \"mean\",\n", " \"score\": \"mean\",\n", " \"average_score\": \"first\",\n", " }\n", " )\n", " .reset_index()\n", ")\n", "span_bias_classifications.set_index(\"context.span_id\", inplace=True)\n", "span_bias_classifications.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from phoenix.trace import DocumentEvaluations, SpanEvaluations\n", "\n", "px.Client().log_evaluations(\n", " SpanEvaluations(\n", " dataframe=span_bias_classifications,\n", " eval_name=\"Bias Detection\",\n", " ),\n", " DocumentEvaluations(\n", " dataframe=bias_classifications,\n", " eval_name=\"Relevance\",\n", " ),\n", ")" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 2 }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server