@arizeai/phoenix-mcp

Official

227

7,302

Overview InspectNew Endpoints Schema Related Servers Reviews Score

test_annotation_hepers.py•32.2 kB

# pyright: reportPrivateUsage=false import pandas as pd import pytest from phoenix.client.utils.annotation_helpers import ( _DOCUMENT_ID_CONFIG, _SPAN_ID_CONFIG, _chunk_annotations_dataframe, _chunk_document_annotations_dataframe, _chunk_span_annotations_dataframe, _create_document_annotation, _create_span_annotation, _validate_document_annotations_dataframe, _validate_span_annotations_dataframe, _validate_trace_annotations_dataframe, ) class TestAnnotationDataFrameValidation: """Test suite for annotation DataFrame validation across different ID configurations.""" def test_basic_validation_errors(self) -> None: """Test fundamental validation failures that apply to all annotation types.""" # Empty DataFrame with pytest.raises(ValueError, match="DataFrame cannot be empty"): _validate_span_annotations_dataframe(dataframe=pd.DataFrame()) # Non-pandas input with pytest.raises(TypeError, match="Expected pandas DataFrame"): _validate_span_annotations_dataframe(dataframe="not a dataframe") # type: ignore[arg-type] # pyright: ignore[reportArgumentType] # Missing required columns (no ID columns) df = pd.DataFrame({"label": ["positive"]}) with pytest.raises( ValueError, match="DataFrame must have.*column, index level, or a string-based index" ): _validate_span_annotations_dataframe(dataframe=df) def test_annotation_column_conflicts(self) -> None: """Test validation of conflicting annotation columns.""" # Both name and annotation_name present df = pd.DataFrame( { "name": ["sentiment"], "annotation_name": ["sentiment"], "annotator_kind": ["HUMAN"], "span_id": ["id1"], "label": ["positive"], } ) with pytest.raises( ValueError, match="DataFrame cannot have both 'name' and 'annotation_name' columns" ): _validate_span_annotations_dataframe(dataframe=df) def test_id_column_scenarios(self) -> None: """Test critical ID column scenarios: missing, fallback, conflicts.""" # Missing span_id (should use fallback) df = pd.DataFrame( { "name": ["sentiment"], "annotator_kind": ["HUMAN"], "context.span_id": ["id1"], "label": ["positive"], } ) _validate_span_annotations_dataframe(dataframe=df) # Should pass with fallback # Both primary and fallback present (conflict) df_conflict = pd.DataFrame( { "name": ["sentiment"], "annotator_kind": ["HUMAN"], "span_id": ["id1"], "context.span_id": ["id2"], "label": ["positive"], } ) with pytest.raises( ValueError, match="DataFrame cannot have both primary and fallback ID columns" ): _validate_span_annotations_dataframe(dataframe=df_conflict) def test_span_document_validation(self) -> None: """Test span document specific validation with multiple ID columns.""" # Valid span document DataFrame df = pd.DataFrame( { "name": ["relevance"], "annotator_kind": ["HUMAN"], "span_id": ["span1"], "document_position": [0], "label": ["relevant"], } ) _validate_document_annotations_dataframe(dataframe=df) # Should pass # Missing span_id (should fail - ALL ID columns required for multi-ID configs) df_missing_span = pd.DataFrame( { "name": ["relevance"], "annotator_kind": ["HUMAN"], "document_position": [0], "label": ["relevant"], } ) with pytest.raises(ValueError, match="DataFrame must have ALL required ID columns"): _validate_document_annotations_dataframe(dataframe=df_missing_span) # Missing document_position (should also fail) df_missing_doc = pd.DataFrame( { "name": ["relevance"], "annotator_kind": ["HUMAN"], "span_id": ["span1"], "label": ["relevant"], } ) with pytest.raises(ValueError, match="DataFrame must have ALL required ID columns"): _validate_document_annotations_dataframe(dataframe=df_missing_doc) def test_global_parameter_validation(self) -> None: """Test validation when fields are required vs optional in DataFrame.""" df_with_fields = pd.DataFrame( { "span_id": ["id1"], "name": ["sentiment"], "annotator_kind": ["HUMAN"], "label": ["positive"], } ) df_without_fields = pd.DataFrame({"span_id": ["id1"], "label": ["positive"]}) # Fields not required - should pass even without name/kind columns _validate_span_annotations_dataframe(dataframe=df_without_fields) # Fields required - should fail without name/kind columns with pytest.raises( ValueError, match="DataFrame must contain either 'name' or 'annotation_name' column when annotation_name_required=True", ): _validate_span_annotations_dataframe( dataframe=df_without_fields, annotation_name_required=True ) # Fields required - should pass with name/kind columns _validate_span_annotations_dataframe( dataframe=df_with_fields, annotation_name_required=True, annotator_kind_required=True ) def test_index_based_id_validation(self) -> None: """Test validation for ID columns provided via index instead of columns.""" # Single ID in named index should pass df_single_index = pd.DataFrame( { "name": ["test"], "annotator_kind": ["HUMAN"], "label": ["positive"], }, index=pd.Index(["span1"], name="span_id"), ) _validate_span_annotations_dataframe(dataframe=df_single_index) # Should pass # Multi-ID in MultiIndex should pass multi_index = pd.MultiIndex.from_tuples( # pyright: ignore[reportUnknownMemberType] [("span1", 0)], names=["span_id", "document_position"] ) df_multi_index = pd.DataFrame( { "name": ["test"], "annotator_kind": ["HUMAN"], "label": ["positive"], }, index=multi_index, ) _validate_document_annotations_dataframe(dataframe=df_multi_index) # Should pass # Missing ID level in MultiIndex should fail incomplete_multi_index = pd.MultiIndex.from_tuples( # pyright: ignore[reportUnknownMemberType] [("span1", "extra")], names=["span_id", "extra_level"], # Missing document_position ) df_incomplete_index = pd.DataFrame( { "name": ["test"], "annotator_kind": ["HUMAN"], "label": ["positive"], }, index=incomplete_multi_index, ) with pytest.raises(ValueError, match="DataFrame must have ALL required ID columns"): _validate_document_annotations_dataframe(dataframe=df_incomplete_index) class TestAnnotationDataFrameChunking: """Test suite for annotation DataFrame chunking across different ID configurations.""" def test_single_vs_multiple_chunks(self) -> None: """Test chunking behavior with different DataFrame sizes.""" # Single row -> single chunk df_small = pd.DataFrame( { "name": ["test"], "annotator_kind": ["HUMAN"], "span_id": ["id1"], "label": ["positive"], } ) chunks = list( _chunk_annotations_dataframe( dataframe=df_small, id_config=_SPAN_ID_CONFIG, annotation_factory=_create_span_annotation, ) ) assert len(chunks) == 1 assert len(chunks[0]) == 1 # 5 rows with chunk_size=2 -> 3 chunks (2, 2, 1) df_multi = pd.DataFrame( { "name": [f"test{i}" for i in range(5)], "annotator_kind": ["HUMAN"] * 5, "span_id": [f"id{i}" for i in range(5)], "label": [f"label{i}" for i in range(5)], } ) chunks = list( _chunk_annotations_dataframe( dataframe=df_multi, chunk_size=2, id_config=_SPAN_ID_CONFIG, annotation_factory=_create_span_annotation, ) ) assert len(chunks) == 3 assert [len(chunk) for chunk in chunks] == [2, 2, 1] def test_global_parameters(self) -> None: """Test chunking with global annotation name and annotator kind.""" df = pd.DataFrame({"span_id": ["id1", "id2"], "label": ["positive", "negative"]}) chunks = list( _chunk_annotations_dataframe( dataframe=df, annotation_name="sentiment", annotator_kind="HUMAN", id_config=_SPAN_ID_CONFIG, annotation_factory=_create_span_annotation, ) ) # Verify global parameters are applied annotation = chunks[0][0] assert annotation["name"] == "sentiment" assert annotation["annotator_kind"] == "HUMAN" def test_multi_id_extraction(self) -> None: """Test extraction of multiple ID parameters (span_id + document_position).""" df = pd.DataFrame( { "name": ["relevance"], "annotator_kind": ["HUMAN"], "span_id": ["span1"], "document_position": [0], # Critical: test that 0 is handled correctly "label": ["relevant"], } ) chunks = list( _chunk_annotations_dataframe( dataframe=df, id_config=_DOCUMENT_ID_CONFIG, annotation_factory=_create_document_annotation, ) ) annotation = chunks[0][0] assert annotation["span_id"] == "span1" assert annotation["document_position"] == 0 # Ensure 0 is preserved def test_type_conversion(self) -> None: """Test proper type conversion for different ID column types.""" df = pd.DataFrame( { "name": ["test"], "annotator_kind": ["HUMAN"], "span_id": ["span1"], "document_position": ["2"], # String that should convert to int "label": ["test"], } ) chunks = list( _chunk_annotations_dataframe( dataframe=df, id_config=_DOCUMENT_ID_CONFIG, annotation_factory=_create_document_annotation, ) ) annotation = chunks[0][0] assert annotation["span_id"] == "span1" assert annotation["document_position"] == 2 assert isinstance(annotation["document_position"], int) def test_fallback_column_usage(self) -> None: """Test that fallback columns are used when primary columns are missing.""" df = pd.DataFrame( { "name": ["test"], "annotator_kind": ["HUMAN"], "context.span_id": ["span1"], # Using fallback column "label": ["positive"], } ) chunks = list( _chunk_annotations_dataframe( dataframe=df, id_config=_SPAN_ID_CONFIG, annotation_factory=_create_span_annotation, ) ) annotation = chunks[0][0] assert annotation["span_id"] == "span1" def test_index_fallback(self) -> None: """Test using DataFrame index when no ID columns are available.""" df = pd.DataFrame( {"name": ["test"], "annotator_kind": ["HUMAN"], "label": ["positive"]}, index=["span1"] ) chunks = list( _chunk_annotations_dataframe( dataframe=df, id_config=_SPAN_ID_CONFIG, annotation_factory=_create_span_annotation, ) ) annotation = chunks[0][0] assert annotation["span_id"] == "span1" def test_named_index_for_single_id(self) -> None: """Test using a named index for single ID column extraction.""" # Create DataFrame with named index matching the ID column name df = pd.DataFrame( {"name": ["test"], "annotator_kind": ["HUMAN"], "label": ["positive"]}, index=pd.Index(["span1"], name="span_id"), ) chunks = list( _chunk_annotations_dataframe( dataframe=df, id_config=_SPAN_ID_CONFIG, annotation_factory=_create_span_annotation, ) ) annotation = chunks[0][0] assert annotation["span_id"] == "span1" def test_multiindex_for_document_annotations(self) -> None: """Test using MultiIndex for document annotations with both span_id and document_position.""" # Create DataFrame with MultiIndex containing both required ID columns multi_index = pd.MultiIndex.from_tuples( # pyright: ignore[reportUnknownMemberType] [("span1", 0), ("span1", 1), ("span2", 0)], names=["span_id", "document_position"] ) df = pd.DataFrame( { "name": ["relevance", "accuracy", "completeness"], "annotator_kind": ["HUMAN", "LLM", "CODE"], "label": ["relevant", "accurate", "complete"], }, index=multi_index, ) chunks = list( _chunk_annotations_dataframe( dataframe=df, id_config=_DOCUMENT_ID_CONFIG, annotation_factory=_create_document_annotation, ) ) # Check all three annotations were created correctly assert len(chunks) == 1 # All fit in one chunk annotations = chunks[0] assert len(annotations) == 3 # First annotation assert annotations[0]["span_id"] == "span1" assert annotations[0]["document_position"] == 0 assert annotations[0]["name"] == "relevance" # Second annotation assert annotations[1]["span_id"] == "span1" assert annotations[1]["document_position"] == 1 assert annotations[1]["name"] == "accuracy" # Third annotation assert annotations[2]["span_id"] == "span2" assert annotations[2]["document_position"] == 0 assert annotations[2]["name"] == "completeness" def test_multiindex_validation_for_document_annotations(self) -> None: """Test validation passes for MultiIndex with document annotation ID columns.""" # Create DataFrame with MultiIndex containing both required ID columns multi_index = pd.MultiIndex.from_tuples( # pyright: ignore[reportUnknownMemberType] [("span1", 0), ("span2", 1)], names=["span_id", "document_position"] ) df = pd.DataFrame( { "name": ["relevance", "accuracy"], "annotator_kind": ["HUMAN", "LLM"], "label": ["relevant", "accurate"], }, index=multi_index, ) # This should pass validation _validate_document_annotations_dataframe(dataframe=df) def test_partial_index_partial_columns_error(self) -> None: """Test that having some ID columns in index and others in columns raises an error.""" # Create DataFrame with span_id in index but document_position in column df = pd.DataFrame( { "name": ["test"], "annotator_kind": ["HUMAN"], "document_position": [0], # This is in a column "label": ["positive"], }, index=pd.Index(["span1"], name="span_id"), # This is in the index ) # This should fail - mixing index and columns for multi-ID config with pytest.raises( ValueError, match="For multi-ID configurations, all ID columns must be in the same location", ): _validate_document_annotations_dataframe(dataframe=df) def test_multiindex_wrong_names_error(self) -> None: """Test that MultiIndex with wrong level names fails validation.""" # Create MultiIndex with incorrect names multi_index = pd.MultiIndex.from_tuples( # pyright: ignore[reportUnknownMemberType] [("span1", 0)], names=["wrong_name", "document_position"], # span_id is named incorrectly ) df = pd.DataFrame( { "name": ["test"], "annotator_kind": ["HUMAN"], "label": ["positive"], }, index=multi_index, ) # This should fail validation with pytest.raises(ValueError, match="DataFrame must have ALL required ID columns"): _validate_document_annotations_dataframe(dataframe=df) def test_multiindex_incomplete_id_columns_error(self) -> None: """Test that MultiIndex missing required ID columns fails validation.""" # Create MultiIndex with only one of the required columns multi_index = pd.MultiIndex.from_tuples( # pyright: ignore[reportUnknownMemberType] [("span1", "extra_data")], names=["span_id", "extra_column"], # Missing document_position ) df = pd.DataFrame( { "name": ["test"], "annotator_kind": ["HUMAN"], "label": ["positive"], }, index=multi_index, ) # This should fail validation with pytest.raises(ValueError, match="DataFrame must have ALL required ID columns"): _validate_document_annotations_dataframe(dataframe=df) def test_mixed_index_column_types(self) -> None: """Test proper type conversion when extracting from MultiIndex.""" # Create MultiIndex with string document_position that should convert to int multi_index = pd.MultiIndex.from_tuples( # pyright: ignore[reportUnknownMemberType] [("span1", "2"), ("span2", "0")], # document_position as string names=["span_id", "document_position"], ) df = pd.DataFrame( { "name": ["test1", "test2"], "annotator_kind": ["HUMAN", "LLM"], "label": ["positive", "negative"], }, index=multi_index, ) chunks = list( _chunk_annotations_dataframe( dataframe=df, id_config=_DOCUMENT_ID_CONFIG, annotation_factory=_create_document_annotation, ) ) annotations = chunks[0] # Check type conversion worked assert annotations[0]["document_position"] == 2 assert isinstance(annotations[0]["document_position"], int) assert annotations[1]["document_position"] == 0 assert isinstance(annotations[1]["document_position"], int) def test_annotation_type_differences(self) -> None: """Test differences between span, trace, and span-document annotations.""" # Span annotation span_df = pd.DataFrame( { "name": ["test"], "annotator_kind": ["HUMAN"], "span_id": ["span1"], "label": ["positive"], } ) span_chunks = list( _chunk_annotations_dataframe( dataframe=span_df, id_config=_SPAN_ID_CONFIG, annotation_factory=_create_span_annotation, ) ) assert span_chunks[0][0]["span_id"] == "span1" # Span document annotation doc_df = pd.DataFrame( { "name": ["test"], "annotator_kind": ["HUMAN"], "span_id": ["span1"], "document_position": [1], "label": ["positive"], } ) doc_chunks = list( _chunk_annotations_dataframe( dataframe=doc_df, id_config=_DOCUMENT_ID_CONFIG, annotation_factory=_create_document_annotation, ) ) annotation = doc_chunks[0][0] assert annotation["span_id"] == "span1" assert annotation["document_position"] == 1 def test_error_handling(self) -> None: """Test critical error scenarios and edge cases.""" # Invalid score type conversion df = pd.DataFrame( { "name": ["test"], "annotator_kind": ["HUMAN"], "span_id": ["id1"], "score": ["not_a_number"], } ) with pytest.raises( ValueError, match="Score value 'not_a_number' cannot be converted to float" ): list( _chunk_annotations_dataframe( dataframe=df, id_config=_SPAN_ID_CONFIG, annotation_factory=_create_span_annotation, ) ) # Missing required ID column in multi-ID config should fail during chunking df_missing_span = pd.DataFrame( { "name": ["test"], "annotator_kind": ["HUMAN"], "document_position": [0], # Missing span_id "label": ["positive"], } ) with pytest.raises(ValueError, match="Row 0: Missing required ID columns"): list( _chunk_annotations_dataframe( dataframe=df_missing_span, id_config=_DOCUMENT_ID_CONFIG, annotation_factory=_create_document_annotation, ) ) def test_invalid_annotator_kinds(self) -> None: """Test validation of annotator_kind values.""" # Invalid annotator_kind in DataFrame df = pd.DataFrame( { "name": ["test"], "annotator_kind": ["INVALID_KIND"], "span_id": ["id1"], "label": ["positive"], } ) with pytest.raises(ValueError, match="Invalid annotator_kind values.*INVALID_KIND"): _validate_span_annotations_dataframe(dataframe=df) def test_edge_case_values(self) -> None: """Test handling of edge case values that could cause issues.""" # Empty strings and whitespace handling df_whitespace = pd.DataFrame( { "name": [" "], # Whitespace-only name "annotator_kind": ["HUMAN"], "span_id": ["id1"], "label": ["positive"], } ) with pytest.raises(ValueError, match="name values must be non-empty strings"): _validate_span_annotations_dataframe( dataframe=df_whitespace, annotation_name_required=True ) # None values in name column df_none = pd.DataFrame( { "name": [None], "annotator_kind": ["HUMAN"], "span_id": ["id1"], "label": ["positive"], } ) with pytest.raises(ValueError, match="name values cannot be None"): _validate_span_annotations_dataframe(dataframe=df_none, annotation_name_required=True) class TestAnnotationCreation: """Test suite for direct annotation creation functions.""" def test_span_annotation_creation(self) -> None: """Test creating span annotations with different parameter combinations.""" # Minimal required parameters basic_annotation = _create_span_annotation( span_id="span1", annotation_name="sentiment", label="positive" ) assert basic_annotation["span_id"] == "span1" assert basic_annotation["name"] == "sentiment" assert basic_annotation["annotator_kind"] == "HUMAN" # default assert basic_annotation.get("result", {}).get("label") == "positive" # Full parameters with all optional fields full_annotation = _create_span_annotation( span_id="span2", annotation_name="quality", annotator_kind="LLM", label="high_quality", score=0.95, explanation="Well structured response", metadata={"model": "gpt-4", "version": "1.0"}, identifier="eval_run_1", ) assert full_annotation["span_id"] == "span2" assert full_annotation["name"] == "quality" assert full_annotation["annotator_kind"] == "LLM" result = full_annotation.get("result", {}) assert result.get("label") == "high_quality" assert result.get("score") == 0.95 assert result.get("explanation") == "Well structured response" metadata = full_annotation.get("metadata", {}) assert metadata.get("model") == "gpt-4" assert full_annotation.get("identifier") == "eval_run_1" def test_document_annotation_creation(self) -> None: """Test creating document annotations with position handling.""" # Basic document annotation doc_annotation = _create_document_annotation( span_id="span1", document_position=0, annotation_name="relevance", label="relevant" ) assert doc_annotation["span_id"] == "span1" assert doc_annotation["document_position"] == 0 assert doc_annotation["name"] == "relevance" # Document annotation with all fields full_doc_annotation = _create_document_annotation( span_id="span2", document_position=2, annotation_name="accuracy", annotator_kind="CODE", label="accurate", score=0.88, explanation="Contains factual information", metadata={"source": "fact_checker"}, ) assert full_doc_annotation["document_position"] == 2 result = full_doc_annotation.get("result", {}) assert result.get("label") == "accurate" def test_identifier_edge_cases(self) -> None: """Test identifier handling edge cases.""" # Empty string identifier should be stripped and not included annotation = _create_span_annotation( span_id="span1", annotation_name="test", identifier="", label="test" ) assert annotation.get("identifier") is None # Whitespace-only identifier should be stripped and not included annotation = _create_span_annotation( span_id="span1", annotation_name="test", identifier=" ", label="test" ) assert annotation.get("identifier") is None # Valid identifier with whitespace should be stripped but included annotation = _create_span_annotation( span_id="span1", annotation_name="test", identifier=" valid_id ", label="test" ) assert annotation.get("identifier") == "valid_id" class TestPublicWrapperFunctions: """Test suite for the public wrapper functions users will call directly.""" def test_trace_validation(self) -> None: """Test trace annotation validation for critical differences from span validation.""" # Valid trace DataFrame df = pd.DataFrame( { "name": ["sentiment"], "annotator_kind": ["HUMAN"], "trace_id": ["trace1"], "label": ["positive"], } ) _validate_trace_annotations_dataframe(dataframe=df) # Should pass # Missing trace_id should fail df_missing = pd.DataFrame({"name": ["sentiment"], "label": ["positive"]}) with pytest.raises(ValueError, match="DataFrame must have.*trace_id"): _validate_trace_annotations_dataframe(dataframe=df_missing) def test_public_chunking_functions(self) -> None: """Test public chunking functions work correctly.""" # Test span chunking wrapper span_df = pd.DataFrame( { "name": ["test1", "test2"], "span_id": ["span1", "span2"], "label": ["pos", "neg"], } ) span_chunks = list( _chunk_span_annotations_dataframe(dataframe=span_df, annotator_kind="LLM", chunk_size=1) ) assert len(span_chunks) == 2 assert span_chunks[0][0]["annotator_kind"] == "LLM" # Test document chunking wrapper doc_df = pd.DataFrame( { "name": ["relevance"], "span_id": ["span1"], "document_position": [0], "score": [0.9], } ) doc_chunks = list( _chunk_document_annotations_dataframe( dataframe=doc_df, annotation_name="relevance_global", annotator_kind="CODE" ) ) assert len(doc_chunks) == 1 # Global name should override DataFrame name assert doc_chunks[0][0]["name"] == "relevance_global" assert doc_chunks[0][0]["annotator_kind"] == "CODE" def test_whitespace_handling_consistency(self) -> None: """Test that all string fields consistently strip whitespace.""" df = pd.DataFrame( { "name": [" sentiment "], # Leading/trailing whitespace "span_id": ["span1"], "label": [" positive "], # Leading/trailing whitespace "explanation": [" good response "], # Leading/trailing whitespace } ) chunks = list(_chunk_span_annotations_dataframe(dataframe=df, annotator_kind="HUMAN")) annotation = chunks[0][0] # New consistent behavior - all string fields are stripped # annotation_name/name: NOW stripped (removes whitespace) assert annotation["name"] == "sentiment" # label: NOW stripped (removes whitespace) result = annotation.get("result", {}) assert result.get("label") == "positive" # explanation: IS stripped (removes whitespace) assert result.get("explanation") == "good response" # Test identifier stripping behavior annotation_with_id = _create_span_annotation( span_id="span1", annotation_name="test", identifier=" whitespace_id ", label="test" ) # identifier: IS stripped (removes whitespace) assert annotation_with_id.get("identifier") == "whitespace_id" def test_span_id_whitespace_handling(self) -> None: """Test that span_id values are NOW stripped (new consistent behavior).""" # Direct creation function - span_id now stripped annotation = _create_span_annotation( span_id=" span_with_whitespace ", annotation_name="test", label="test" ) # span_id: NOW stripped (removes whitespace) assert annotation["span_id"] == "span_with_whitespace" # DataFrame chunking - span_id extracted and stripped df = pd.DataFrame( { "name": ["test"], "span_id": [" span_from_df "], # Leading/trailing whitespace "label": ["positive"], } ) chunks = list(_chunk_span_annotations_dataframe(dataframe=df, annotator_kind="HUMAN")) annotation = chunks[0][0] # span_id: NOW stripped (removes whitespace from DataFrame) assert annotation["span_id"] == "span_from_df"

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server