@arizeai/phoenix-mcp

Official

Overview Schema Related Servers Score Discussions

test_dataset_helpers.py•37.7 KiB

import json from typing import Any import pytest from openinference.semconv.trace import ( DocumentAttributes, MessageAttributes, MessageContentAttributes, OpenInferenceMimeTypeValues, OpenInferenceSpanKindValues, SpanAttributes, ToolAttributes, ToolCallAttributes, ) from phoenix.db.models import Span from phoenix.server.api.helpers.dataset_helpers import ( _get_message, _merge_assistant_output_items, get_dataset_example_input, get_dataset_example_output, get_experiment_example_output, ) from phoenix.trace.attributes import unflatten # DocumentAttributes DOCUMENT_CONTENT = DocumentAttributes.DOCUMENT_CONTENT DOCUMENT_ID = DocumentAttributes.DOCUMENT_ID DOCUMENT_SCORE = DocumentAttributes.DOCUMENT_SCORE # MessageAttributes MESSAGE_CONTENT = MessageAttributes.MESSAGE_CONTENT MESSAGE_CONTENTS = MessageAttributes.MESSAGE_CONTENTS MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = MessageAttributes.MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON MESSAGE_FUNCTION_CALL_NAME = MessageAttributes.MESSAGE_FUNCTION_CALL_NAME MESSAGE_NAME = MessageAttributes.MESSAGE_NAME MESSAGE_ROLE = MessageAttributes.MESSAGE_ROLE MESSAGE_TOOL_CALLS = MessageAttributes.MESSAGE_TOOL_CALLS # MessageContentAttributes (OpenAI Responses API output format) MESSAGE_CONTENT_TEXT = MessageContentAttributes.MESSAGE_CONTENT_TEXT # OpenInferenceMimeTypeValues JSON = OpenInferenceMimeTypeValues.JSON.value TEXT = OpenInferenceMimeTypeValues.TEXT.value # OpenInferenceSpanKindValues CHAIN = OpenInferenceSpanKindValues.CHAIN.value LLM = OpenInferenceSpanKindValues.LLM.value RETRIEVER = OpenInferenceSpanKindValues.RETRIEVER.value # SpanAttributes INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE INPUT_VALUE = SpanAttributes.INPUT_VALUE LLM_INPUT_MESSAGES = SpanAttributes.LLM_INPUT_MESSAGES LLM_OUTPUT_MESSAGES = SpanAttributes.LLM_OUTPUT_MESSAGES LLM_PROMPT_TEMPLATE_VARIABLES = SpanAttributes.LLM_PROMPT_TEMPLATE_VARIABLES OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND OUTPUT_MIME_TYPE = SpanAttributes.OUTPUT_MIME_TYPE OUTPUT_VALUE = SpanAttributes.OUTPUT_VALUE RETRIEVAL_DOCUMENTS = SpanAttributes.RETRIEVAL_DOCUMENTS # ToolCallAttributes TOOL_CALL_FUNCTION_ARGUMENTS_JSON = ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON TOOL_CALL_FUNCTION_NAME = ToolCallAttributes.TOOL_CALL_FUNCTION_NAME @pytest.mark.parametrize( "span, expected_input_value", [ pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (OPENINFERENCE_SPAN_KIND, LLM), (INPUT_VALUE, "plain-text-input"), (INPUT_MIME_TYPE, TEXT), (OUTPUT_VALUE, "plain-text-output"), (OUTPUT_MIME_TYPE, TEXT), ( LLM_PROMPT_TEMPLATE_VARIABLES, json.dumps({"variable_name": "variable-value"}), ), (f"{LLM_INPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "123"), (f"{LLM_INPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), (f"{LLM_INPUT_MESSAGES}.0.{MESSAGE_NAME}", "xyz"), (f"{LLM_INPUT_MESSAGES}.1.{MESSAGE_CONTENT}", "user-message"), (f"{LLM_INPUT_MESSAGES}.1.{MESSAGE_ROLE}", "user"), (f"{LLM_INPUT_MESSAGES}.2.{MESSAGE_ROLE}", "assistant"), (f"{LLM_INPUT_MESSAGES}.2.{MESSAGE_FUNCTION_CALL_NAME}", "add"), ( f"{LLM_INPUT_MESSAGES}.2.{MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON}", json.dumps({"a": 363, "b": 42}), ), (f"{LLM_INPUT_MESSAGES}.3.{MESSAGE_CONTENT}", "user-message"), (f"{LLM_INPUT_MESSAGES}.3.{MESSAGE_ROLE}", "user"), (f"{LLM_INPUT_MESSAGES}.4.{MESSAGE_ROLE}", "assistant"), ( f"{LLM_INPUT_MESSAGES}.4.{MESSAGE_TOOL_CALLS}.0.{TOOL_CALL_FUNCTION_NAME}", "multiply", ), ( f"{LLM_INPUT_MESSAGES}.4.{MESSAGE_TOOL_CALLS}.0.{TOOL_CALL_FUNCTION_ARGUMENTS_JSON}", json.dumps({"a": 121, "b": 3}), ), ( f"{LLM_INPUT_MESSAGES}.4.{MESSAGE_TOOL_CALLS}.1.{TOOL_CALL_FUNCTION_NAME}", "add", ), ( f"{LLM_INPUT_MESSAGES}.4.{MESSAGE_TOOL_CALLS}.1.{TOOL_CALL_FUNCTION_ARGUMENTS_JSON}", json.dumps({"a": 363, "b": 42}), ), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "assistant-message"), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ) ), ), { "messages": [ {"content": "123", "role": "assistant", "name": "xyz"}, {"content": "user-message", "role": "user"}, { "role": "assistant", "tool_calls": [ {"function": {"name": "add", "arguments": {"a": 363, "b": 42}}}, ], }, {"content": "user-message", "role": "user"}, { "role": "assistant", "tool_calls": [ {"function": {"name": "multiply", "arguments": {"a": 121, "b": 3}}}, {"function": {"name": "add", "arguments": {"a": 363, "b": 42}}}, ], }, ], "variable_name": "variable-value", }, id="llm-span-with-input-messages-and-prompt-template-variables", ), pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (INPUT_VALUE, "plain-text-input"), (INPUT_MIME_TYPE, TEXT), (OUTPUT_VALUE, "plain-text-output"), (OUTPUT_MIME_TYPE, TEXT), (f"{LLM_INPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "user-message"), (f"{LLM_INPUT_MESSAGES}.0.{MESSAGE_ROLE}", "user"), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "assistant-message"), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ) ), ), { "messages": [{"content": "user-message", "role": "user"}], }, id="llm-span-with-input-messages-and-no-prompt-template-variables", ), pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (INPUT_VALUE, "plain-text-input"), (INPUT_MIME_TYPE, TEXT), (OUTPUT_VALUE, "plain-text-output"), (OUTPUT_MIME_TYPE, TEXT), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "assistant-message"), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ) ), ), {"input": "plain-text-input"}, id="llm-span-with-no-input-messages-and-plain-text-input", ), pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (INPUT_VALUE, "plain-text-input"), (INPUT_MIME_TYPE, TEXT), (OUTPUT_VALUE, "plain-text-output"), (OUTPUT_MIME_TYPE, TEXT), ( LLM_PROMPT_TEMPLATE_VARIABLES, json.dumps({"variable_name": "variable-value"}), ), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "assistant-message"), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ) ), ), { "input": "plain-text-input", "variable_name": "variable-value", }, id="llm-span-with-no-input-messages-and-plain-text-input-with-prompt-template-variables", ), pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (INPUT_VALUE, json.dumps({"llm-span-input": "llm-input"})), (INPUT_MIME_TYPE, JSON), (OUTPUT_VALUE, "plain-text-output"), (OUTPUT_MIME_TYPE, TEXT), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "assistant-message"), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ) ), ), {"llm-span-input": "llm-input"}, id="llm-span-with-no-input-messages-and-json-input", ), pytest.param( Span( span_kind="CHAIN", attributes=unflatten( ( (INPUT_VALUE, "plain-text-input"), (INPUT_MIME_TYPE, TEXT), (OUTPUT_VALUE, "plain-text-output"), (OUTPUT_MIME_TYPE, TEXT), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "assistant-message"), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ) ), ), {"input": "plain-text-input"}, id="chain-span-with-plain-text-input", ), pytest.param( Span( span_kind="CHAIN", attributes=unflatten( ( (INPUT_VALUE, json.dumps({"chain_input": "chain-input"})), (INPUT_MIME_TYPE, JSON), (OUTPUT_VALUE, "plain-text-output"), (OUTPUT_MIME_TYPE, TEXT), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "assistant-message"), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ) ), ), {"chain_input": "chain-input"}, id="chain-span-with-json-input", ), ], ) def test_get_dataset_example_input(span: Span, expected_input_value: dict[str, Any]) -> None: input_value = get_dataset_example_input(span) assert expected_input_value == input_value @pytest.mark.parametrize( "message, expected", [ pytest.param( unflatten([(MESSAGE_ROLE, "user")]), {"role": "user"}, id="role_only", ), pytest.param( unflatten( [ (MESSAGE_ROLE, "assistant"), (MESSAGE_CONTENT, "hi"), (MESSAGE_NAME, "bot"), ] ), {"role": "assistant", "content": "hi", "name": "bot"}, id="content_and_name", ), pytest.param( unflatten( [ (MESSAGE_ROLE, "assistant"), (MESSAGE_FUNCTION_CALL_NAME, "add"), (MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON, json.dumps({"a": 1, "b": 2})), ] ), { "role": "assistant", "tool_calls": [ {"function": {"name": "add", "arguments": {"a": 1, "b": 2}}}, ], }, id="function_call_json_string_deserialized", ), pytest.param( unflatten( [ (MESSAGE_ROLE, "assistant"), (MESSAGE_FUNCTION_CALL_NAME, "add"), (MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON, "not valid json"), ] ), { "role": "assistant", "tool_calls": [ {"function": {"name": "add", "arguments": "not valid json"}}, ], }, id="function_call_invalid_json_left_as_string", ), pytest.param( unflatten( [ (MESSAGE_ROLE, "assistant"), ( MESSAGE_TOOL_CALLS, [ unflatten( [ (TOOL_CALL_FUNCTION_NAME, "multiply"), ( TOOL_CALL_FUNCTION_ARGUMENTS_JSON, json.dumps({"x": 10}), ), ] ), ], ), ] ), { "role": "assistant", "tool_calls": [ {"function": {"name": "multiply", "arguments": {"x": 10}}}, ], }, id="tool_calls_json_string_deserialized", ), pytest.param( unflatten( [ (MESSAGE_ROLE, "assistant"), ( MESSAGE_TOOL_CALLS, [ unflatten( [ (TOOL_CALL_FUNCTION_NAME, "run"), ( TOOL_CALL_FUNCTION_ARGUMENTS_JSON, "{broken", ), ] ), ], ), ] ), { "role": "assistant", "tool_calls": [ {"function": {"name": "run", "arguments": "{broken"}}, ], }, id="tool_calls_invalid_json_left_as_string", ), pytest.param( unflatten([(MESSAGE_ROLE, "user"), (MESSAGE_TOOL_CALLS, [])]), {"role": "user"}, id="empty_tool_calls_omitted", ), pytest.param( unflatten([(MESSAGE_ROLE, "user"), (MESSAGE_TOOL_CALLS, None)]), {"role": "user"}, id="none_tool_calls_omitted", ), ], ) def test_get_message(message: dict[str, Any], expected: dict[str, Any]) -> None: assert _get_message(message) == expected @pytest.mark.parametrize( "raw, expected", [ pytest.param( [], [], id="empty_input", ), pytest.param( [{"role": "assistant", "content": "hello"}], [{"role": "assistant", "content": "hello"}], id="single_assistant_passthrough", ), pytest.param( [{"role": "user", "content": "hi"}, {"role": "assistant", "content": "hey"}], [{"role": "user", "content": "hi"}, {"role": "assistant", "content": "hey"}], id="non_assistant_not_merged", ), pytest.param( [ {"role": "assistant", "content": "I will call the tool."}, { "role": "assistant", "tool_calls": [ {"function": {"name": "get_weather", "arguments": {"loc": "SF"}}} ], }, ], [ { "role": "assistant", "content": "I will call the tool.", "tool_calls": [ {"function": {"name": "get_weather", "arguments": {"loc": "SF"}}} ], } ], id="content_and_tool_call_merged", ), pytest.param( [ { "role": "assistant", "tool_calls": [ {"function": {"name": "tool_a", "arguments": {}}, "id": "c1"}, ], }, { "role": "assistant", "tool_calls": [ {"function": {"name": "tool_b", "arguments": {}}, "id": "c2"}, ], }, ], [ { "role": "assistant", "tool_calls": [ {"function": {"name": "tool_a", "arguments": {}}, "id": "c1"}, {"function": {"name": "tool_b", "arguments": {}}, "id": "c2"}, ], } ], id="multiple_tool_calls_merged", ), pytest.param( [ {"role": "user", "content": "question"}, {"role": "assistant", "content": "answer"}, { "role": "assistant", "tool_calls": [{"function": {"name": "search", "arguments": {"q": "x"}}}], }, {"role": "user", "content": "follow-up"}, {"role": "assistant", "content": "done"}, ], [ {"role": "user", "content": "question"}, { "role": "assistant", "content": "answer", "tool_calls": [{"function": {"name": "search", "arguments": {"q": "x"}}}], }, {"role": "user", "content": "follow-up"}, {"role": "assistant", "content": "done"}, ], id="interleaved_roles_merge_only_within_runs", ), pytest.param( [ { "role": "assistant", "tool_calls": [{"function": {"name": "fn", "arguments": {}}}], }, ], [ { "role": "assistant", "tool_calls": [{"function": {"name": "fn", "arguments": {}}}], }, ], id="tool_calls_only_no_content", ), pytest.param( [{"role": "assistant"}, {"role": "assistant"}], [{"role": "assistant"}], id="consecutive_empty_assistants_merged", ), pytest.param( [ {"role": "assistant"}, {"role": "assistant", "content": "late content"}, ], [{"role": "assistant", "content": "late content"}], id="content_from_later_item_adopted", ), ], ) def test_merge_assistant_output_items( raw: list[dict[str, Any]], expected: list[dict[str, Any]] ) -> None: assert _merge_assistant_output_items(raw) == expected # type: ignore[arg-type] @pytest.mark.parametrize( "span, expected_output_value", [ pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (INPUT_VALUE, "plain-text-input"), (INPUT_MIME_TYPE, TEXT), (OUTPUT_VALUE, "plain-text-output"), (OUTPUT_MIME_TYPE, TEXT), ( LLM_PROMPT_TEMPLATE_VARIABLES, json.dumps({"variable_name": "variable-value"}), ), (f"{LLM_INPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "user-message"), (f"{LLM_INPUT_MESSAGES}.0.{MESSAGE_ROLE}", "user"), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "assistant-message"), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ) ), ), {"messages": [{"content": "assistant-message", "role": "assistant"}]}, id="llm-span-with-output-messages", ), pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (INPUT_VALUE, "plain-text-input"), (INPUT_MIME_TYPE, TEXT), (OUTPUT_VALUE, "plain-text-output"), (OUTPUT_MIME_TYPE, TEXT), (f"{LLM_INPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "user-message"), (f"{LLM_INPUT_MESSAGES}.0.{MESSAGE_ROLE}", "user"), # OpenAI Responses API stores output under message.contents.*.message_content.text (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ( f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENTS}.0.{MESSAGE_CONTENT_TEXT}", "Responses API output text", ), ) ), ), {"messages": [{"content": "Responses API output text", "role": "assistant"}]}, id="llm-span-with-output-messages-responses-api-format", ), pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ( f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENTS}.0.{MESSAGE_CONTENT_TEXT}", "First block", ), ( f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENTS}.1.{MESSAGE_CONTENT_TEXT}", "Second block", ), ) ), ), {"messages": [{"content": "First block\n\nSecond block", "role": "assistant"}]}, id="llm-span-with-output-messages-responses-api-format-multiple-parts", ), pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (INPUT_VALUE, "plain-text-input"), (INPUT_MIME_TYPE, TEXT), (OUTPUT_VALUE, "plain-text-output"), (OUTPUT_MIME_TYPE, TEXT), (f"{LLM_INPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "user-message"), (f"{LLM_INPUT_MESSAGES}.0.{MESSAGE_ROLE}", "user"), ) ), ), {"output": "plain-text-output"}, id="llm-span-with-no-output-messages-but-with-plain-text-output", ), pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (INPUT_VALUE, "plain-text-input"), (INPUT_MIME_TYPE, TEXT), (OUTPUT_VALUE, json.dumps({"llm-span-output": "value"})), (OUTPUT_MIME_TYPE, JSON), ) ), ), {"llm-span-output": "value"}, id="llm-span-with-no-output-messages-and-json-output", ), pytest.param( Span( span_kind=RETRIEVER, attributes=unflatten( ( (INPUT_VALUE, json.dumps({"retriever-input": "retriever-input"})), (INPUT_MIME_TYPE, JSON), (OUTPUT_VALUE, "plain-text-output"), (OUTPUT_MIME_TYPE, TEXT), (f"{RETRIEVAL_DOCUMENTS}.0.{DOCUMENT_ID}", "1"), (f"{RETRIEVAL_DOCUMENTS}.0.{DOCUMENT_SCORE}", 0.5), (f"{RETRIEVAL_DOCUMENTS}.0.{DOCUMENT_CONTENT}", "document-content"), ) ), ), {"documents": [{"id": "1", "score": 0.5, "content": "document-content"}]}, id="retriever-span-with-retrieval-documents", ), pytest.param( Span( span_kind=RETRIEVER, attributes=unflatten( ( (INPUT_VALUE, json.dumps({"retriever-input": "retriever-input"})), (INPUT_MIME_TYPE, JSON), (OUTPUT_VALUE, "plain-text-output"), (OUTPUT_MIME_TYPE, TEXT), ) ), ), {"output": "plain-text-output"}, id="retriever-span-with-plain-text-output-and-no-retrieval-documents", ), pytest.param( Span( span_kind=RETRIEVER, attributes=unflatten( ( (INPUT_VALUE, json.dumps({"retriever-input": "retriever-input"})), (INPUT_MIME_TYPE, JSON), (OUTPUT_VALUE, json.dumps({"retriever_output": "retriever-output"})), (OUTPUT_MIME_TYPE, JSON), ) ), ), {"retriever_output": "retriever-output"}, id="retriever-span-with-json-output-and-no-retrieval-documents", ), pytest.param( Span( span_kind=CHAIN, attributes=unflatten( ( (INPUT_VALUE, json.dumps({"chain_input": "chain-input"})), (INPUT_MIME_TYPE, JSON), (OUTPUT_VALUE, "plain-text-output"), (OUTPUT_MIME_TYPE, TEXT), ) ), ), {"output": "plain-text-output"}, id="chain-span-with-plain-text-output", ), pytest.param( Span( span_kind=CHAIN, attributes=unflatten( ( (INPUT_VALUE, json.dumps({"chain_input": "chain-input"})), (INPUT_MIME_TYPE, JSON), (OUTPUT_VALUE, json.dumps({"chain_output": "chain-output"})), (OUTPUT_MIME_TYPE, JSON), ) ), ), {"chain_output": "chain-output"}, id="chain-span-with-json-output", ), pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "No tools here."), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ) ), ), {"messages": [{"content": "No tools here.", "role": "assistant"}]}, id="llm-span-without-tools", ), pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "Calling get_weather."), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ( f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_TOOL_CALLS}.0.{TOOL_CALL_FUNCTION_NAME}", "get_weather", ), ( f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_TOOL_CALLS}.0.{TOOL_CALL_FUNCTION_ARGUMENTS_JSON}", '{"location": "SF"}', ), ( f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_TOOL_CALLS}.1.{TOOL_CALL_FUNCTION_NAME}", "get_time", ), ( f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_TOOL_CALLS}.1.{TOOL_CALL_FUNCTION_ARGUMENTS_JSON}", "{}", ), ) ), ), { "messages": [ { "content": "Calling get_weather.", "role": "assistant", "tool_calls": [ { "function": { "name": "get_weather", "arguments": {"location": "SF"}, }, }, { "function": { "name": "get_time", "arguments": {}, }, }, ], } ] }, id="llm-span-with-output-messages-and-tool-calls-including-id", ), pytest.param( Span( span_kind=LLM, attributes=unflatten( ( # Responses API: first item = message with content (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ( f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENTS}.0.{MESSAGE_CONTENT_TEXT}", "I will call the tool.", ), # Second item = function_call (single tool at MESSAGE_TOOL_CALLS.0) (f"{LLM_OUTPUT_MESSAGES}.1.{MESSAGE_ROLE}", "assistant"), ( f"{LLM_OUTPUT_MESSAGES}.1.{MESSAGE_TOOL_CALLS}.0.{TOOL_CALL_FUNCTION_NAME}", "fetch_data", ), ( f"{LLM_OUTPUT_MESSAGES}.1.{MESSAGE_TOOL_CALLS}.0.{TOOL_CALL_FUNCTION_ARGUMENTS_JSON}", '{"q": "x"}', ), # Third item = another function_call (f"{LLM_OUTPUT_MESSAGES}.2.{MESSAGE_ROLE}", "assistant"), ( f"{LLM_OUTPUT_MESSAGES}.2.{MESSAGE_TOOL_CALLS}.0.{TOOL_CALL_FUNCTION_NAME}", "other_tool", ), ( f"{LLM_OUTPUT_MESSAGES}.2.{MESSAGE_TOOL_CALLS}.0.{TOOL_CALL_FUNCTION_ARGUMENTS_JSON}", "{}", ), ) ), ), { "messages": [ { "content": "I will call the tool.", "role": "assistant", "tool_calls": [ { "function": { "name": "fetch_data", "arguments": {"q": "x"}, }, }, { "function": { "name": "other_tool", "arguments": {}, }, }, ], } ] }, id="llm-span-responses-api-merged-message-and-tool-calls", ), ], ) def test_get_dataset_example_output(span: Span, expected_output_value: dict[str, Any]) -> None: output_value = get_dataset_example_output(span) assert expected_output_value == output_value @pytest.mark.parametrize( "span, expected_output_value", [ pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "I can help with weather."), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ( f"{SpanAttributes.LLM_TOOLS}.0.{ToolAttributes.TOOL_JSON_SCHEMA}", json.dumps( { "type": "function", "function": { "name": "get_weather", "description": "Get current weather", "parameters": { "type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"], }, }, } ), ), ) ), ), { "messages": [{"content": "I can help with weather.", "role": "assistant"}], "available_tools": [ { "type": "function", "function": { "name": "get_weather", "description": "Get current weather", "parameters": { "type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"], }, }, } ], }, id="llm-span-with-output-messages-and-single-tool", ), pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "I have multiple tools."), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ( f"{SpanAttributes.LLM_TOOLS}.0.{ToolAttributes.TOOL_JSON_SCHEMA}", json.dumps( { "type": "function", "function": { "name": "get_weather", "parameters": {"type": "object"}, }, } ), ), ( f"{SpanAttributes.LLM_TOOLS}.1.{ToolAttributes.TOOL_JSON_SCHEMA}", json.dumps( { "type": "function", "function": { "name": "send_email", "parameters": {"type": "object"}, }, } ), ), ) ), ), { "messages": [{"content": "I have multiple tools.", "role": "assistant"}], "available_tools": [ { "type": "function", "function": {"name": "get_weather", "parameters": {"type": "object"}}, }, { "type": "function", "function": {"name": "send_email", "parameters": {"type": "object"}}, }, ], }, id="llm-span-with-output-messages-and-multiple-tools", ), pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", "No tools here."), (f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"), ) ), ), { "messages": [{"content": "No tools here.", "role": "assistant"}], "available_tools": [], }, id="llm-span-without-tools", ), pytest.param( Span( span_kind=LLM, attributes=unflatten( ( (OUTPUT_VALUE, "plain-text-output"), (OUTPUT_MIME_TYPE, TEXT), ( f"{SpanAttributes.LLM_TOOLS}.0.{ToolAttributes.TOOL_JSON_SCHEMA}", json.dumps( { "type": "function", "function": { "name": "calculator", "parameters": {"type": "object"}, }, } ), ), ) ), ), { "output": "plain-text-output", "available_tools": [ { "type": "function", "function": {"name": "calculator", "parameters": {"type": "object"}}, } ], }, id="llm-span-with-plain-text-output-and-tools", ), ], ) def test_get_experiment_example_output(span: Span, expected_output_value: dict[str, Any]) -> None: output_value = get_experiment_example_output(span) assert expected_output_value == output_value

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_dataset_helpers.py•37.7 KiB