evaluate_rag_haystack.ipynb•32.9 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "4t3LXM0aNbl2"
},
"source": [
"<center>\n",
" <p style=\"text-align:center\">\n",
" <img alt=\"phoenix logo\" src=\"https://storage.googleapis.com/arize-phoenix-assets/assets/phoenix-logo-light.svg\" width=\"200\"/>\n",
" <br>\n",
" <a href=\"https://arize.com/docs/phoenix/\">Docs</a>\n",
" |\n",
" <a href=\"https://github.com/Arize-ai/phoenix\">GitHub</a>\n",
" |\n",
" <a href=\"https://arize-ai.slack.com/join/shared_invite/zt-2w57bhem8-hq24MB6u7yE_ZF_ilOYSBw#/shared-invite/email\">Community</a>\n",
" </p>\n",
"</center>\n",
"<h1 align=\"center\">Tracing and Evaluating a Haystack Application with Phoenix</h1>\n",
"\n",
"ℹ️ This notebook requires an OpenAI API key.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fGcvMui6fZIA",
"outputId": "95cf1638-aee8-4844-defe-fa501fbe76d2"
},
"outputs": [],
"source": [
"!pip install -q openinference-instrumentation-haystack haystack-ai \"arize-phoenix>=4.29.0\" 'httpx<0.28'"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "mqitn1QzOU5v"
},
"source": [
"# Set API Keys"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rDg3U7kbOUX_",
"outputId": "551483a6-52d8-4a9b-b1ff-8ab63864c710"
},
"outputs": [],
"source": [
"import getpass\n",
"import os\n",
"\n",
"if not (openai_api_key := os.getenv(\"OPENAI_API_KEY\")):\n",
" openai_api_key = getpass(\"🔑 Enter your OpenAI API key: \")\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = openai_api_key"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "qlWmN0pvpJCG"
},
"source": [
"# Launch Phoenix and Enable Haystack Tracing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "3OhQrnlsfgdN"
},
"outputs": [],
"source": [
"# Use the cloud instance of Phoenix. If it's not, we'll start a local instance.\n",
"\n",
"\n",
"import os\n",
"from getpass import getpass\n",
"\n",
"if not (phoenix_api_key := os.getenv(\"PHOENIX_API_KEY\")):\n",
" phoenix_api_key = getpass(\"🔑 Enter your Phoenix API key: \")\n",
"os.environ[\"PHOENIX_API_KEY\"] = phoenix_api_key\n",
"\n",
"if not (openai_api_key := os.getenv(\"OPENAI_API_KEY\")):\n",
" openai_api_key = getpass(\"🔑 Enter your OpenAI API key: \")\n",
"os.environ[\"OPENAI_API_KEY\"] = openai_api_key"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from phoenix.otel import register\n",
"\n",
"tracer_provider = register(project_name=\"rag-haystack-tutorial\", auto_instrument=True)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "twBLgY1LpMPW"
},
"source": [
"# Set up your Haystack app"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fQYTT4P-fjQa",
"outputId": "41320394-b14e-4dac-e48c-3508c90516eb"
},
"outputs": [],
"source": [
"from haystack import Document, Pipeline\n",
"from haystack.components.builders.prompt_builder import PromptBuilder\n",
"from haystack.components.generators import OpenAIGenerator\n",
"from haystack.components.retrievers.in_memory import InMemoryBM25Retriever\n",
"from haystack.document_stores.in_memory import InMemoryDocumentStore\n",
"\n",
"# Write documents to InMemoryDocumentStore\n",
"document_store = InMemoryDocumentStore()\n",
"document_store.write_documents(\n",
" [\n",
" Document(content=\"My name is Jean and I live in Paris.\"),\n",
" Document(content=\"My name is Mark and I live in Berlin.\"),\n",
" Document(content=\"My name is Giorgio and I live in Rome.\"),\n",
" ]\n",
")\n",
"\n",
"# Build a RAG pipeline\n",
"prompt_template = \"\"\"\n",
"Given these documents, answer the question.\n",
"Documents:\n",
"{% for doc in documents %}\n",
" {{ doc.content }}\n",
"{% endfor %}\n",
"Question: {{question}}\n",
"Answer:\n",
"\"\"\"\n",
"\n",
"retriever = InMemoryBM25Retriever(document_store=document_store)\n",
"prompt_builder = PromptBuilder(template=prompt_template)\n",
"llm = OpenAIGenerator(model=\"gpt-3.5-turbo\")\n",
"\n",
"rag_pipeline = Pipeline()\n",
"rag_pipeline.add_component(\"retriever\", retriever)\n",
"rag_pipeline.add_component(\"prompt_builder\", prompt_builder)\n",
"rag_pipeline.add_component(\"llm\", llm)\n",
"rag_pipeline.connect(\"retriever\", \"prompt_builder.documents\")\n",
"rag_pipeline.connect(\"prompt_builder\", \"llm\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ji06yJ2Bfmx9",
"outputId": "aa417de4-8ce9-41b3-c322-e5ed216dadcc"
},
"outputs": [],
"source": [
"# Ask a question\n",
"question = \"Who lives in Paris?\"\n",
"results = rag_pipeline.run(\n",
" {\n",
" \"retriever\": {\"query\": question},\n",
" \"prompt_builder\": {\"question\": question},\n",
" }\n",
")\n",
"\n",
"print(results[\"llm\"][\"replies\"])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KDsd4qJIXfZv"
},
"source": [
"# Evaluating Retrieved Docs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "_0XDNCW3YFCz"
},
"outputs": [],
"source": [
"import nest_asyncio\n",
"\n",
"import phoenix as px\n",
"\n",
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 318
},
"id": "FT9NbFomYYoX",
"outputId": "c51c4ef2-c738-44e1-e5ed-9f5d1cd6d090"
},
"outputs": [],
"source": [
"from phoenix.session.evaluation import get_retrieved_documents\n",
"\n",
"client = px.Client()\n",
"\n",
"retrieved_documents_df = get_retrieved_documents(px.Client())\n",
"retrieved_documents_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 49,
"referenced_widgets": [
"03c4fba631ee4c248a1df176bac0c4d1",
"56de8838c18b4b7bbac4b2b7698dc966",
"ef24004aeccf4a4aa17f415a9f3e8376",
"8761cad8dfb2438bbe2f6a210ea30c44",
"dacebfc1bbeb42418e0af201e66d659c",
"ac230f37777740519cf5fdb17d60873b",
"f4b5da887a1e4b68bb14bda987a80bc4",
"82e3baf2439044a0a5b9f215b1ef585f",
"ba1467f305b74f0eb970572ff2772426",
"471fc55eb46b4d0fb6f96048a0f130b6",
"6d48080a5a9f416fb3fe442e9bde518e"
]
},
"id": "RsAJdoFWYZzk",
"outputId": "6a6987fe-6a82-4c9c-da69-15261cffbdd2"
},
"outputs": [],
"source": [
"from phoenix.evals import OpenAIModel, RelevanceEvaluator, run_evals\n",
"\n",
"relevance_evaluator = RelevanceEvaluator(OpenAIModel(model=\"gpt-4o-mini\"))\n",
"\n",
"retrieved_documents_relevance_df = run_evals(\n",
" evaluators=[relevance_evaluator],\n",
" dataframe=retrieved_documents_df,\n",
" provide_explanation=True,\n",
" concurrency=20,\n",
")[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 175
},
"id": "p0ANPW35Xhkx",
"outputId": "ffc2a959-c7e1-4fb2-d3aa-18220fb3aaa7"
},
"outputs": [],
"source": [
"retrieved_documents_relevance_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TSwP858Wb7Qj",
"outputId": "1acd1b35-3e91-4809-fc59-da919c5350d8"
},
"outputs": [],
"source": [
"from phoenix.trace import DocumentEvaluations\n",
"\n",
"px.Client().log_evaluations(\n",
" DocumentEvaluations(dataframe=retrieved_documents_relevance_df, eval_name=\"relevance\"),\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "O5JBstOntoJx"
},
"source": [
"# Evaluate Response"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 168
},
"id": "WX0iCQJWtwQQ",
"outputId": "a5dfd75b-31b4-430e-f002-4e257abbe995"
},
"outputs": [],
"source": [
"from phoenix.session.evaluation import get_qa_with_reference\n",
"\n",
"qa_with_reference_df = get_qa_with_reference(px.Client())\n",
"qa_with_reference_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 49,
"referenced_widgets": [
"dc7bd8404c13468e99fcc37e8377ac57",
"d6308c3845ba42bd80b0e1903f54f5ff",
"294d2ad16e3647c5b1497eccb5fcf155",
"28a7c2f8b2c14bcd8a409f5b5bca7235",
"2fcda3f03a554fceadf85637c34a725a",
"cf9dd33671024014b9536cca596d343e",
"5ed868930d1540b988589a439b1c1b9a",
"81fce336b5f24b148dd557af20f93bc6",
"100cb58ebb3d4ce4a886277a24dd2afc",
"81ad93aae4e94c27af8161f25ba63f98",
"d2809f1d40024b37a6fdd62dd6acb44d"
]
},
"id": "O1RozU7ptywT",
"outputId": "1872f2d8-3a1c-4976-8c1d-bff49d7b6994"
},
"outputs": [],
"source": [
"from phoenix.evals import (\n",
" HallucinationEvaluator,\n",
" OpenAIModel,\n",
" QAEvaluator,\n",
" run_evals,\n",
")\n",
"\n",
"qa_evaluator = QAEvaluator(OpenAIModel(model=\"gpt-4-turbo-preview\"))\n",
"hallucination_evaluator = HallucinationEvaluator(OpenAIModel(model=\"gpt-4-turbo-preview\"))\n",
"\n",
"qa_correctness_eval_df, hallucination_eval_df = run_evals(\n",
" evaluators=[qa_evaluator, hallucination_evaluator],\n",
" dataframe=qa_with_reference_df,\n",
" provide_explanation=True,\n",
" concurrency=20,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Gjgqr_Gxt0Ke",
"outputId": "3f8dc15a-a069-46d9-e717-61b8f06b8835"
},
"outputs": [],
"source": [
"from phoenix.client import AsyncClient\n",
"\n",
"px_client = AsyncClient()\n",
"await px_client.spans.log_span_annotations_dataframe(\n",
" dataframe=qa_correctness_eval_df,\n",
" annotation_name=\"Q&A Correctness\",\n",
" annotator_kind=\"LLM\",\n",
")\n",
"await px_client.spans.log_span_annotations_dataframe(\n",
" dataframe=hallucination_eval_df,\n",
" annotation_name=\"Hallucination\",\n",
" annotator_kind=\"LLM\",\n",
")"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"03c4fba631ee4c248a1df176bac0c4d1": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_56de8838c18b4b7bbac4b2b7698dc966",
"IPY_MODEL_ef24004aeccf4a4aa17f415a9f3e8376",
"IPY_MODEL_8761cad8dfb2438bbe2f6a210ea30c44"
],
"layout": "IPY_MODEL_dacebfc1bbeb42418e0af201e66d659c"
}
},
"471fc55eb46b4d0fb6f96048a0f130b6": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"56de8838c18b4b7bbac4b2b7698dc966": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_ac230f37777740519cf5fdb17d60873b",
"placeholder": "",
"style": "IPY_MODEL_f4b5da887a1e4b68bb14bda987a80bc4",
"value": "run_evals "
}
},
"6d48080a5a9f416fb3fe442e9bde518e": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"82e3baf2439044a0a5b9f215b1ef585f": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"8761cad8dfb2438bbe2f6a210ea30c44": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_471fc55eb46b4d0fb6f96048a0f130b6",
"placeholder": "",
"style": "IPY_MODEL_6d48080a5a9f416fb3fe442e9bde518e",
"value": " 3/3 (100.0%) | ⏳ 00:01<00:00 | 2.49it/s"
}
},
"ac230f37777740519cf5fdb17d60873b": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ba1467f305b74f0eb970572ff2772426": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"dacebfc1bbeb42418e0af201e66d659c": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ef24004aeccf4a4aa17f415a9f3e8376": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_82e3baf2439044a0a5b9f215b1ef585f",
"max": 3,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_ba1467f305b74f0eb970572ff2772426",
"value": 3
}
},
"f4b5da887a1e4b68bb14bda987a80bc4": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}