from_openai.ipynb•22.9 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "db0c6b46",
"metadata": {},
"outputs": [],
"source": [
"%pip install -Uqqq arize-phoenix-client arize-phoenix-otel openai requests openinference-instrumentation-openai"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "384710ac",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import os\n",
"from enum import Enum\n",
"from getpass import getpass\n",
"from itertools import chain\n",
"from textwrap import dedent\n",
"\n",
"import openai\n",
"import pandas as pd\n",
"import requests\n",
"from IPython.display import HTML, display\n",
"from openai import OpenAI\n",
"from openai.lib._parsing import type_to_response_format_param\n",
"from openai.types.chat.completion_create_params import CompletionCreateParamsBase\n",
"from openinference.instrumentation.openai import OpenAIInstrumentor\n",
"from pydantic import BaseModel, create_model\n",
"\n",
"from phoenix.client import Client\n",
"from phoenix.client.types import PromptVersion\n",
"from phoenix.otel import register"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21f76ad1",
"metadata": {},
"outputs": [],
"source": [
"tracer_provider = register()\n",
"OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "92f666ad",
"metadata": {},
"outputs": [],
"source": [
"if not os.getenv(\"OPENAI_API_KEY\"):\n",
" os.environ[\"OPENAI_API_KEY\"] = getpass(\"OpenAI API key: \")"
]
},
{
"cell_type": "markdown",
"id": "a2b98d5ff1986241",
"metadata": {},
"source": [
"Install and start a local instance of Phoenix if needed."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ddc86060",
"metadata": {},
"outputs": [],
"source": [
"%pip install -Uqqq arize-phoenix\n",
"import phoenix as px\n",
"\n",
"px.launch_app()"
]
},
{
"cell_type": "markdown",
"id": "44bfdad5",
"metadata": {},
"source": [
"# Text Generation"
]
},
{
"cell_type": "markdown",
"id": "e943ba2d",
"metadata": {},
"source": [
"## Quick Start"
]
},
{
"cell_type": "markdown",
"id": "7a938255",
"metadata": {},
"source": [
"Here's a simple LLM invocation."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b7d8a59",
"metadata": {},
"outputs": [],
"source": [
"params = CompletionCreateParamsBase(\n",
" model=\"gpt-4o-mini\",\n",
" temperature=0,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": \"You are coding poet.\"},\n",
" {\"role\": \"user\", \"content\": \"Write a haiku about recursion in programming.\"},\n",
" ],\n",
")\n",
"resp = OpenAI().chat.completions.create(**params)\n",
"print(resp.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "5936b683",
"metadata": {},
"source": [
"We can save the prompt in Phoenix."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1d8ccdb5",
"metadata": {},
"outputs": [],
"source": [
"# prompt identifier should contain only alphanumeric characters, hyphens or underscores\n",
"prompt_identifier = \"haiku-recursion\"\n",
"\n",
"prompt = Client().prompts.create(\n",
" name=prompt_identifier,\n",
" prompt_description=\"Haiku about recursion in programming\",\n",
" version=PromptVersion.from_openai(params),\n",
")"
]
},
{
"cell_type": "markdown",
"id": "82a333e9",
"metadata": {},
"source": [
"We can fetch prompt from Phoenix."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb045337",
"metadata": {},
"outputs": [],
"source": [
"prompt = Client().prompts.get(prompt_identifier=prompt_identifier)\n",
"resp = OpenAI().chat.completions.create(**prompt.format())\n",
"print(resp.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "6be49d30",
"metadata": {},
"source": [
"# Response Format"
]
},
{
"cell_type": "markdown",
"id": "464facea",
"metadata": {},
"source": [
"## Text summarization"
]
},
{
"cell_type": "markdown",
"id": "3a34ac64",
"metadata": {},
"source": [
"Based on [this example](https://colab.research.google.com/github/openai/openai-cookbook/blob/a3e98ea4dcf866b5e7a3cb7d63dccaa68c7d63aa/examples/Structured_Outputs_Intro.ipynb#scrollTo=5eae3aea) from OpenAI cookbook."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ff2b8e7",
"metadata": {},
"outputs": [],
"source": [
"summarization_prompt = \"\"\"\\\n",
" You will be provided with content from an article about an invention.\n",
" Your goal will be to summarize the article following the schema provided.\n",
" Here is a description of the parameters:\n",
" - invented_year: year in which the invention discussed in the article was invented\n",
" - summary: one sentence summary of what the invention is\n",
" - inventors: array of strings listing the inventor full names if present, otherwise just surname\n",
" - concepts: array of key concepts related to the invention, each concept containing a title and a description\n",
" - description: short description of the invention\n",
"\"\"\"\n",
"\n",
"\n",
"class ArticleSummary(BaseModel):\n",
" invented_year: int\n",
" summary: str\n",
" inventors: list[str]\n",
" description: str\n",
"\n",
" class Concept(BaseModel):\n",
" title: str\n",
" description: str\n",
"\n",
" concepts: list[Concept]\n",
"\n",
"\n",
"response_format = type_to_response_format_param(ArticleSummary)\n",
"\n",
"params = CompletionCreateParamsBase(\n",
" model=\"gpt-4o-mini\",\n",
" temperature=0.2,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": dedent(summarization_prompt)},\n",
" {\"role\": \"user\", \"content\": \"{{text}}\"},\n",
" ],\n",
" response_format=response_format,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "0099b64e",
"metadata": {},
"source": [
"Save prompt in Phoenix"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5a412c09",
"metadata": {},
"outputs": [],
"source": [
"# prompt identifier should contain only alphanumeric characters, hyphens or underscores\n",
"prompt_identifier = \"summarize-invention-article\"\n",
"\n",
"prompt = Client().prompts.create(\n",
" name=prompt_identifier,\n",
" prompt_description=\"Summarize an article about an invention\",\n",
" version=PromptVersion.from_openai(params),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2a17cef6",
"metadata": {},
"outputs": [],
"source": [
"src = \"https://raw.githubusercontent.com/openai/openai-cookbook/refs/heads/main/examples/data/structured_outputs_articles\"\n",
"articles = [{\"text\": requests.get(f\"{src}/{f}\").text} for f in [\"cnns.md\", \"llms.md\", \"moe.md\"]]"
]
},
{
"cell_type": "markdown",
"id": "02176e93",
"metadata": {},
"source": [
"Fetch prompt from Phoenix and apply to articles"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "189f9181",
"metadata": {},
"outputs": [],
"source": [
"prompt = Client().prompts.get(prompt_identifier=prompt_identifier)\n",
"\n",
"\n",
"def get_response(input: dict[str, str]):\n",
" response = OpenAI().chat.completions.create(**prompt.format(variables=input))\n",
" return json.loads(response.choices[0].message.content)\n",
"\n",
"\n",
"# Collect results into a DataFrame.\n",
"res = pd.json_normalize(map(get_response, articles))\n",
"display(HTML(res.to_html()))"
]
},
{
"cell_type": "markdown",
"id": "032b13c0",
"metadata": {},
"source": [
"## UI generation"
]
},
{
"cell_type": "markdown",
"id": "c548692f",
"metadata": {},
"source": [
"Based on [this example](https://platform.openai.com/docs/guides/structured-outputs?example=ui-generation)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cb31979e",
"metadata": {},
"outputs": [],
"source": [
"class _UIType(str, Enum):\n",
" div = \"div\"\n",
" button = \"button\"\n",
" header = \"header\"\n",
" section = \"section\"\n",
" field = \"field\"\n",
" form = \"form\"\n",
"\n",
"\n",
"class _Attribute(BaseModel):\n",
" name: str\n",
" value: str\n",
"\n",
"\n",
"class _UI(BaseModel):\n",
" type: _UIType\n",
" label: str\n",
" children: list[\"_UI\"]\n",
" attributes: list[\"_Attribute\"]\n",
"\n",
"\n",
"_UI.model_rebuild()\n",
"\n",
"response_format = type_to_response_format_param(create_model(\"Response\", ui=(_UI, ...)))\n",
"\n",
"params = CompletionCreateParamsBase(\n",
" messages=[{\"role\": \"user\", \"content\": \"Generate form for {{feature}}\"}],\n",
" model=\"gpt-4o-mini\",\n",
" response_format=response_format,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "6f8cab3f",
"metadata": {},
"source": [
"Save prompt in Phoenix."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5d88ac8a",
"metadata": {},
"outputs": [],
"source": [
"# prompt identifier should contain only alphanumeric characters, hyphens or underscores\n",
"prompt_identifier = \"ui-generator\"\n",
"\n",
"prompt = Client().prompts.create(\n",
" name=prompt_identifier,\n",
" version=PromptVersion.from_openai(params),\n",
")"
]
},
{
"cell_type": "markdown",
"id": "133fd20d",
"metadata": {},
"source": [
"Fetch prompt from Phoenix."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5d5396e9",
"metadata": {},
"outputs": [],
"source": [
"prompt = Client().prompts.get(prompt_identifier=prompt_identifier)\n",
"\n",
"variables = {\"feature\": \"user login\"}\n",
"resp = OpenAI().chat.completions.create(**prompt.format(variables=variables))\n",
"print(resp.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "5d52a45a",
"metadata": {},
"source": [
"# Function Calling"
]
},
{
"cell_type": "markdown",
"id": "24cb2bb9",
"metadata": {},
"source": [
"## Entity extraction from user input"
]
},
{
"cell_type": "markdown",
"id": "b67e12a0",
"metadata": {},
"source": [
"Based on [this example](https://colab.research.google.com/github/openai/openai-cookbook/blob/a3e98ea4dcf866b5e7a3cb7d63dccaa68c7d63aa/examples/Structured_Outputs_Intro.ipynb#scrollTo=ee802699) from OpenAI cookbook."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7891b1d9",
"metadata": {},
"outputs": [],
"source": [
"product_search_prompt = \"\"\"\\\n",
" You are a clothes recommendation agent, specialized in finding the perfect match for a user.\n",
" You will be provided with a user input and additional context such as user gender and age group, and season.\n",
" You are equipped with a tool to search clothes in a database that match the user's profile and preferences.\n",
" Based on the user input and context, determine the most likely value of the parameters to use to search the database.\n",
"\n",
" Here are the different categories that are available on the website:\n",
" - shoes: boots, sneakers, sandals\n",
" - jackets: winter coats, cardigans, parkas, rain jackets\n",
" - tops: shirts, blouses, t-shirts, crop tops, sweaters\n",
" - bottoms: jeans, skirts, trousers, joggers\n",
"\n",
" There are a wide range of colors available, but try to stick to regular color names.\n",
"\"\"\"\n",
"\n",
"\n",
"class ProductSearchParameters(BaseModel):\n",
" class Category(str, Enum):\n",
" shoes = \"shoes\"\n",
" jackets = \"jackets\"\n",
" tops = \"tops\"\n",
" bottoms = \"bottoms\"\n",
"\n",
" category: Category\n",
" subcategory: str\n",
" color: str\n",
"\n",
"\n",
"params = CompletionCreateParamsBase(\n",
" model=\"gpt-4o-mini\",\n",
" temperature=0,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": dedent(product_search_prompt)},\n",
" {\"role\": \"user\", \"content\": \"CONTEXT: {{context}}\\n\\nUSER INPUT: {{user_input}}\"},\n",
" ],\n",
" tools=[\n",
" openai.pydantic_function_tool(\n",
" ProductSearchParameters,\n",
" name=\"product_search\",\n",
" description=\"Search for a match in the product database\",\n",
" )\n",
" ],\n",
")"
]
},
{
"cell_type": "markdown",
"id": "d7ff2fef",
"metadata": {},
"source": [
"Save prompt in Phoenix"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "96c2f1bc",
"metadata": {},
"outputs": [],
"source": [
"# prompt identifier should contain only alphanumeric characters, hyphens or underscores\n",
"prompt_identifier = \"extract-email-addresses-into-json-data\"\n",
"\n",
"prompt = Client().prompts.create(\n",
" name=prompt_identifier,\n",
" prompt_description=\"Extract email addresses into JSON data\",\n",
" version=PromptVersion.from_openai(params),\n",
")"
]
},
{
"cell_type": "markdown",
"id": "03e16a84",
"metadata": {},
"source": [
"Define example inputs"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f0095ac",
"metadata": {},
"outputs": [],
"source": [
"example_inputs = [\n",
" {\n",
" \"user_input\": \"I'm looking for a new coat. I'm always cold so please something warm! Ideally something that matches my eyes.\",\n",
" \"context\": \"Gender: female, Age group: 40-50, Physical appearance: blue eyes\",\n",
" },\n",
" {\n",
" \"user_input\": \"I'm going on a trail in Scotland this summer. It's goind to be rainy. Help me find something.\",\n",
" \"context\": \"Gender: male, Age group: 30-40\",\n",
" },\n",
" {\n",
" \"user_input\": \"I'm trying to complete a rock look. I'm missing shoes. Any suggestions?\",\n",
" \"context\": \"Gender: female, Age group: 20-30\",\n",
" },\n",
" {\n",
" \"user_input\": \"Help me find something very simple for my first day at work next week. Something casual and neutral.\",\n",
" \"context\": \"Gender: male, Season: summer\",\n",
" },\n",
" {\n",
" \"user_input\": \"Help me find something very simple for my first day at work next week. Something casual and neutral.\",\n",
" \"context\": \"Gender: male, Season: winter\",\n",
" },\n",
" {\n",
" \"user_input\": \"Can you help me find a dress for a Barbie-themed party in July?\",\n",
" \"context\": \"Gender: female, Age group: 20-30\",\n",
" },\n",
"]"
]
},
{
"cell_type": "markdown",
"id": "876c7ac2",
"metadata": {},
"source": [
"Fetch prompt from Phoenix and apply to example inputs"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da3be7e1",
"metadata": {},
"outputs": [],
"source": [
"prompt = Client().prompts.get(prompt_identifier=prompt_identifier)\n",
"\n",
"\n",
"def get_response(input: dict[str, str]):\n",
" response = OpenAI().chat.completions.create(**prompt.format(variables=input))\n",
" tool_calls = response.choices[0].message.tool_calls\n",
" return ({**json.loads(tc.function.arguments), **input} for tc in tool_calls)\n",
"\n",
"\n",
"# Collect results into a DataFrame.\n",
"res = pd.json_normalize(chain.from_iterable(map(get_response, example_inputs)))\n",
"res = res.set_index([\"user_input\", \"context\"])\n",
"display(HTML(res.to_html()))"
]
},
{
"cell_type": "markdown",
"id": "f18a9704",
"metadata": {},
"source": [
"# Reasoning"
]
},
{
"cell_type": "markdown",
"id": "4ce35114",
"metadata": {},
"source": [
"## Using Reasoning for Routine Genaration"
]
},
{
"cell_type": "markdown",
"id": "af381d55",
"metadata": {},
"source": [
"Based on [this example](https://colab.research.google.com/github/openai/openai-cookbook/blob/a3e98ea4dcf866b5e7a3cb7d63dccaa68c7d63aa/examples/o1/Using_reasoning_for_routine_generation.ipynb#scrollTo=ReYoD4FaaPJg&line=8&uniqifier=1) from OpenAI cookbook"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "79674d38",
"metadata": {},
"outputs": [],
"source": [
"CONVERSION_PROMPT = \"\"\"\\\n",
"You are a helpful assistant tasked with taking an external facing help center article and converting it into a internal-facing programmatically executable routine optimized for an LLM.\n",
"The LLM using this routine will be tasked with reading the policy, answering incoming questions from customers, and helping drive the case toward resolution.\n",
"\n",
"Please follow these instructions:\n",
"1. **Review the customer service policy carefully** to ensure every step is accounted for. It is crucial not to skip any steps or policies.\n",
"2. **Organize the instructions into a logical, step-by-step order**, using the specified format.\n",
"3. **Use the following format**:\n",
" - **Main actions are numbered** (e.g., 1, 2, 3).\n",
" - **Sub-actions are lettered** under their relevant main actions (e.g., 1a, 1b).\n",
" **Sub-actions should start on new lines**\n",
" - **Specify conditions using clear 'if...then...else' statements** (e.g., 'If the product was purchased within 30 days, then...').\n",
" - **For instructions that require more information from the customer**, provide polite and professional prompts to ask for additional information.\n",
" - **For actions that require data from external systems**, write a step to call a function using backticks for the function name (e.g., `call the check_delivery_date function`).\n",
" - **If a step requires the customer service agent to take an action** (e.g., process a refund), generate a function call for this action (e.g., `call the process_refund function`).\n",
" - **Define any new functions** by providing a brief description of their purpose and required parameters.\n",
" - **If there is an action an assistant can performon behalf of the user**, include a function call for this action (e.g., `call the change_email_address function`), and ensure the function is defined with its purpose and required parameters.\n",
" - This action may not be explicitly defined in the help center article, but can be done to help the user resolve their inquiry faster\n",
" - **The step prior to case resolution should always be to ask if there is anything more you can assist with**.\n",
" - **End with a final action for case resolution**: calling the `case_resolution` function should always be the final step.\n",
"4. **Ensure compliance** by making sure all steps adhere to company policies, privacy regulations, and legal requirements.\n",
"5. **Handle exceptions or escalations** by specifying steps for scenarios that fall outside the standard policy.\n",
"\n",
"**Important**: If at any point you are uncertain, respond with \"I don't know.\"\n",
"\n",
"Please convert the customer service policy into the formatted routine, ensuring it is easy to follow and execute programmatically.\\\n",
"\"\"\"\n",
"\n",
"params = CompletionCreateParamsBase(\n",
" model=\"o3-mini\",\n",
" messages=[\n",
" {\"role\": \"user\", \"content\": dedent(CONVERSION_PROMPT)},\n",
" {\"role\": \"user\", \"content\": \"POLICY:\\n\\n{{content}}\"},\n",
" ],\n",
")"
]
},
{
"cell_type": "markdown",
"id": "711b50eb",
"metadata": {},
"source": [
"Save prompt in Phoenix."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "849c2502",
"metadata": {},
"outputs": [],
"source": [
"# prompt identifier should contain only alphanumeric characters, hyphens or underscores\n",
"prompt_identifier = \"convert-customer-service-policy\"\n",
"\n",
"prompt = Client().prompts.create(\n",
" name=prompt_identifier,\n",
" prompt_description=\"Convert customer service policy into a routine\",\n",
" version=PromptVersion.from_openai(params),\n",
")"
]
},
{
"cell_type": "markdown",
"id": "df5efd0c",
"metadata": {},
"source": [
"Download articles."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ddaa9636",
"metadata": {},
"outputs": [],
"source": [
"url = \"https://raw.githubusercontent.com/openai/openai-cookbook/a3e98ea4dcf866b5e7a3cb7d63dccaa68c7d63aa/examples/data/helpcenter_articles.csv\"\n",
"articles = pd.read_csv(url).sample(1)\n",
"display(HTML(articles.to_html()))"
]
},
{
"cell_type": "markdown",
"id": "097153fd",
"metadata": {},
"source": [
"Fetch prompt from Phoenix and apply to data."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "145ed932",
"metadata": {},
"outputs": [],
"source": [
"prompt = Client().prompts.get(prompt_identifier=prompt_identifier)\n",
"\n",
"\n",
"def process_article(input: dict[str, str]):\n",
" resp = OpenAI().chat.completions.create(**prompt.format(variables=input))\n",
" routine = resp.choices[0].message.content\n",
" return {\"policy\": input[\"policy\"], \"content\": input[\"content\"], \"routine\": routine}\n",
"\n",
"\n",
"# Collect results into a DataFrame.\n",
"res = pd.json_normalize(map(process_article, articles.to_dict(orient=\"records\")))\n",
"display(HTML(res.to_html()))"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 5
}