Skip to main content
Glama

@arizeai/phoenix-mcp

Official
by Arize-ai
evaluate_rag_haystack.ipynb33.5 kB
{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "4t3LXM0aNbl2" }, "source": [ "<center>\n", " <p style=\"text-align:center\">\n", " <img alt=\"phoenix logo\" src=\"https://storage.googleapis.com/arize-phoenix-assets/assets/phoenix-logo-light.svg\" width=\"200\"/>\n", " <br>\n", " <a href=\"https://arize.com/docs/phoenix/\">Docs</a>\n", " |\n", " <a href=\"https://github.com/Arize-ai/phoenix\">GitHub</a>\n", " |\n", " <a href=\"https://arize-ai.slack.com/join/shared_invite/zt-2w57bhem8-hq24MB6u7yE_ZF_ilOYSBw#/shared-invite/email\">Community</a>\n", " </p>\n", "</center>\n", "<h1 align=\"center\">Tracing and Evaluating a Haystack Application with Phoenix</h1>\n", "\n", "ℹ️ This notebook requires an OpenAI API key.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fGcvMui6fZIA", "outputId": "95cf1638-aee8-4844-defe-fa501fbe76d2" }, "outputs": [], "source": [ "!pip install -q openinference-instrumentation-haystack haystack-ai \"arize-phoenix>=4.29.0\" 'httpx<0.28'" ] }, { "cell_type": "markdown", "metadata": { "id": "mqitn1QzOU5v" }, "source": [ "# Set API Keys" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rDg3U7kbOUX_", "outputId": "551483a6-52d8-4a9b-b1ff-8ab63864c710" }, "outputs": [], "source": [ "import getpass\n", "import os\n", "\n", "if not (openai_api_key := os.getenv(\"OPENAI_API_KEY\")):\n", " openai_api_key = getpass(\"🔑 Enter your OpenAI API key: \")\n", "\n", "os.environ[\"OPENAI_API_KEY\"] = openai_api_key" ] }, { "cell_type": "markdown", "metadata": { "id": "qlWmN0pvpJCG" }, "source": [ "# Launch Phoenix and Enable Haystack Tracing" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "3OhQrnlsfgdN" }, "outputs": [], "source": [ "# Check if PHOENIX_API_KEY is present in the environment variables.\n", "# If it is, we'll use the cloud instance of Phoenix. If it's not, we'll start a local instance.\n", "# A third option is to connect to a docker or locally hosted instance.\n", "# See https://arize.com/docs/phoenix/setup/environments for more information.\n", "\n", "import os\n", "\n", "if \"PHOENIX_API_KEY\" in os.environ:\n", " os.environ[\"OTEL_EXPORTER_OTLP_HEADERS\"] = f\"api_key={os.environ['PHOENIX_API_KEY']}\"\n", " os.environ[\"PHOENIX_CLIENT_HEADERS\"] = f\"api_key={os.environ['PHOENIX_API_KEY']}\"\n", " os.environ[\"PHOENIX_COLLECTOR_ENDPOINT\"] = \"https://app.phoenix.arize.com\"\n", "\n", "else:\n", " import phoenix as px\n", "\n", " px.launch_app().view()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from openinference.instrumentation.haystack import HaystackInstrumentor\n", "\n", "from phoenix.otel import register\n", "\n", "tracer_provider = register()\n", "\n", "# Use Phoenix's autoinstrumentor to automatically track traces from Haystack\n", "HaystackInstrumentor().instrument(tracer_provider=tracer_provider, skip_dep_check=True)" ] }, { "cell_type": "markdown", "metadata": { "id": "twBLgY1LpMPW" }, "source": [ "# Set up your Haystack app" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fQYTT4P-fjQa", "outputId": "41320394-b14e-4dac-e48c-3508c90516eb" }, "outputs": [], "source": [ "from haystack import Document, Pipeline\n", "from haystack.components.builders.prompt_builder import PromptBuilder\n", "from haystack.components.generators import OpenAIGenerator\n", "from haystack.components.retrievers.in_memory import InMemoryBM25Retriever\n", "from haystack.document_stores.in_memory import InMemoryDocumentStore\n", "\n", "# Write documents to InMemoryDocumentStore\n", "document_store = InMemoryDocumentStore()\n", "document_store.write_documents(\n", " [\n", " Document(content=\"My name is Jean and I live in Paris.\"),\n", " Document(content=\"My name is Mark and I live in Berlin.\"),\n", " Document(content=\"My name is Giorgio and I live in Rome.\"),\n", " ]\n", ")\n", "\n", "# Build a RAG pipeline\n", "prompt_template = \"\"\"\n", "Given these documents, answer the question.\n", "Documents:\n", "{% for doc in documents %}\n", " {{ doc.content }}\n", "{% endfor %}\n", "Question: {{question}}\n", "Answer:\n", "\"\"\"\n", "\n", "retriever = InMemoryBM25Retriever(document_store=document_store)\n", "prompt_builder = PromptBuilder(template=prompt_template)\n", "llm = OpenAIGenerator(model=\"gpt-3.5-turbo\")\n", "\n", "rag_pipeline = Pipeline()\n", "rag_pipeline.add_component(\"retriever\", retriever)\n", "rag_pipeline.add_component(\"prompt_builder\", prompt_builder)\n", "rag_pipeline.add_component(\"llm\", llm)\n", "rag_pipeline.connect(\"retriever\", \"prompt_builder.documents\")\n", "rag_pipeline.connect(\"prompt_builder\", \"llm\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ji06yJ2Bfmx9", "outputId": "aa417de4-8ce9-41b3-c322-e5ed216dadcc" }, "outputs": [], "source": [ "# Ask a question\n", "question = \"Who lives in Paris?\"\n", "results = rag_pipeline.run(\n", " {\n", " \"retriever\": {\"query\": question},\n", " \"prompt_builder\": {\"question\": question},\n", " }\n", ")\n", "\n", "print(results[\"llm\"][\"replies\"])" ] }, { "cell_type": "markdown", "metadata": { "id": "KDsd4qJIXfZv" }, "source": [ "# Evaluating Retrieved Docs" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "_0XDNCW3YFCz" }, "outputs": [], "source": [ "import nest_asyncio\n", "\n", "import phoenix as px\n", "\n", "nest_asyncio.apply()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 318 }, "id": "FT9NbFomYYoX", "outputId": "c51c4ef2-c738-44e1-e5ed-9f5d1cd6d090" }, "outputs": [], "source": [ "from phoenix.session.evaluation import get_retrieved_documents\n", "\n", "client = px.Client()\n", "\n", "retrieved_documents_df = get_retrieved_documents(px.Client())\n", "retrieved_documents_df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 49, "referenced_widgets": [ "03c4fba631ee4c248a1df176bac0c4d1", "56de8838c18b4b7bbac4b2b7698dc966", "ef24004aeccf4a4aa17f415a9f3e8376", "8761cad8dfb2438bbe2f6a210ea30c44", "dacebfc1bbeb42418e0af201e66d659c", "ac230f37777740519cf5fdb17d60873b", "f4b5da887a1e4b68bb14bda987a80bc4", "82e3baf2439044a0a5b9f215b1ef585f", "ba1467f305b74f0eb970572ff2772426", "471fc55eb46b4d0fb6f96048a0f130b6", "6d48080a5a9f416fb3fe442e9bde518e" ] }, "id": "RsAJdoFWYZzk", "outputId": "6a6987fe-6a82-4c9c-da69-15261cffbdd2" }, "outputs": [], "source": [ "from phoenix.evals import OpenAIModel, RelevanceEvaluator, run_evals\n", "\n", "relevance_evaluator = RelevanceEvaluator(OpenAIModel(model=\"gpt-4o-mini\"))\n", "\n", "retrieved_documents_relevance_df = run_evals(\n", " evaluators=[relevance_evaluator],\n", " dataframe=retrieved_documents_df,\n", " provide_explanation=True,\n", " concurrency=20,\n", ")[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 175 }, "id": "p0ANPW35Xhkx", "outputId": "ffc2a959-c7e1-4fb2-d3aa-18220fb3aaa7" }, "outputs": [], "source": [ "retrieved_documents_relevance_df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "TSwP858Wb7Qj", "outputId": "1acd1b35-3e91-4809-fc59-da919c5350d8" }, "outputs": [], "source": [ "from phoenix.trace import DocumentEvaluations\n", "\n", "px.Client().log_evaluations(\n", " DocumentEvaluations(dataframe=retrieved_documents_relevance_df, eval_name=\"relevance\"),\n", ")" ] }, { "cell_type": "markdown", "metadata": { "id": "O5JBstOntoJx" }, "source": [ "# Evaluate Response" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 168 }, "id": "WX0iCQJWtwQQ", "outputId": "a5dfd75b-31b4-430e-f002-4e257abbe995" }, "outputs": [], "source": [ "from phoenix.session.evaluation import get_qa_with_reference\n", "\n", "qa_with_reference_df = get_qa_with_reference(px.Client())\n", "qa_with_reference_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 49, "referenced_widgets": [ "dc7bd8404c13468e99fcc37e8377ac57", "d6308c3845ba42bd80b0e1903f54f5ff", "294d2ad16e3647c5b1497eccb5fcf155", "28a7c2f8b2c14bcd8a409f5b5bca7235", "2fcda3f03a554fceadf85637c34a725a", "cf9dd33671024014b9536cca596d343e", "5ed868930d1540b988589a439b1c1b9a", "81fce336b5f24b148dd557af20f93bc6", "100cb58ebb3d4ce4a886277a24dd2afc", "81ad93aae4e94c27af8161f25ba63f98", "d2809f1d40024b37a6fdd62dd6acb44d" ] }, "id": "O1RozU7ptywT", "outputId": "1872f2d8-3a1c-4976-8c1d-bff49d7b6994" }, "outputs": [], "source": [ "from phoenix.evals import (\n", " HallucinationEvaluator,\n", " OpenAIModel,\n", " QAEvaluator,\n", " run_evals,\n", ")\n", "\n", "qa_evaluator = QAEvaluator(OpenAIModel(model=\"gpt-4-turbo-preview\"))\n", "hallucination_evaluator = HallucinationEvaluator(OpenAIModel(model=\"gpt-4-turbo-preview\"))\n", "\n", "qa_correctness_eval_df, hallucination_eval_df = run_evals(\n", " evaluators=[qa_evaluator, hallucination_evaluator],\n", " dataframe=qa_with_reference_df,\n", " provide_explanation=True,\n", " concurrency=20,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Gjgqr_Gxt0Ke", "outputId": "3f8dc15a-a069-46d9-e717-61b8f06b8835" }, "outputs": [], "source": [ "from phoenix.client import AsyncClient\n", "\n", "px_client = AsyncClient()\n", "await px_client.spans.log_span_annotations_dataframe(\n", " dataframe=qa_correctness_eval_df,\n", " annotation_name=\"Q&A Correctness\",\n", " annotator_kind=\"LLM\",\n", ")\n", "await px_client.spans.log_span_annotations_dataframe(\n", " dataframe=hallucination_eval_df,\n", " annotation_name=\"Hallucination\",\n", " annotator_kind=\"LLM\",\n", ")" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.10" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "03c4fba631ee4c248a1df176bac0c4d1": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_56de8838c18b4b7bbac4b2b7698dc966", "IPY_MODEL_ef24004aeccf4a4aa17f415a9f3e8376", "IPY_MODEL_8761cad8dfb2438bbe2f6a210ea30c44" ], "layout": "IPY_MODEL_dacebfc1bbeb42418e0af201e66d659c" } }, "471fc55eb46b4d0fb6f96048a0f130b6": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "56de8838c18b4b7bbac4b2b7698dc966": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ac230f37777740519cf5fdb17d60873b", "placeholder": "​", "style": "IPY_MODEL_f4b5da887a1e4b68bb14bda987a80bc4", "value": "run_evals " } }, "6d48080a5a9f416fb3fe442e9bde518e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "82e3baf2439044a0a5b9f215b1ef585f": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8761cad8dfb2438bbe2f6a210ea30c44": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_471fc55eb46b4d0fb6f96048a0f130b6", "placeholder": "​", "style": "IPY_MODEL_6d48080a5a9f416fb3fe442e9bde518e", "value": " 3/3 (100.0%) | ⏳ 00:01&lt;00:00 |  2.49it/s" } }, "ac230f37777740519cf5fdb17d60873b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ba1467f305b74f0eb970572ff2772426": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "dacebfc1bbeb42418e0af201e66d659c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ef24004aeccf4a4aa17f415a9f3e8376": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_82e3baf2439044a0a5b9f215b1ef585f", "max": 3, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_ba1467f305b74f0eb970572ff2772426", "value": 3 } }, "f4b5da887a1e4b68bb14bda987a80bc4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "nbformat": 4, "nbformat_minor": 0 }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server