Skip to main content
Glama

@arizeai/phoenix-mcp

Official
by Arize-ai
test_model.py6.08 kB
import numpy as np import pandas as pd import pytest from pandas import DataFrame, Timestamp from phoenix.core.embedding_dimension import EmbeddingDimension from phoenix.core.model import _get_embedding_dimensions from phoenix.inferences.inferences import Inferences from phoenix.inferences.schema import EmbeddingColumnNames, Schema @pytest.fixture def inferences_with_large_embedding_vector() -> Inferences: num_records = 3 embedding_dimensions = 7 input_dataframe = DataFrame( { "prediction_label": ["apple", "orange", "grape"], "timestamp": [ Timestamp(year=2023, month=1, day=1, hour=2, second=30), Timestamp(year=2023, month=1, day=5, hour=4, second=25), Timestamp(year=2023, month=1, day=10, hour=6, second=20), ], "embedding_vector0": [np.zeros(embedding_dimensions) for _ in range(num_records)], "link_to_data0": [f"some-link{index}" for index in range(num_records)], "raw_data_column0": [f"some-text{index}" for index in range(num_records)], "embedding_vector1": [np.zeros(embedding_dimensions + 1) for _ in range(num_records)], "link_to_data1": [f"some-link{index}" for index in range(num_records)], "raw_data_column1": [f"some-text{index}" for index in range(num_records)], } ) input_schema = Schema( prediction_label_column_name="prediction_label", timestamp_column_name="timestamp", embedding_feature_column_names={ "embedding_feature0": EmbeddingColumnNames( vector_column_name="embedding_vector0", link_to_data_column_name="link_to_data0", raw_data_column_name="raw_data_column0", ), "embedding_feature1": EmbeddingColumnNames( vector_column_name="embedding_vector1", link_to_data_column_name="link_to_data1", raw_data_column_name="raw_data_column1", ), }, ) return Inferences(dataframe=input_dataframe, schema=input_schema) @pytest.fixture def inferences_with_embedding_vector() -> Inferences: num_records = 3 embedding_dimensions = 5 input_dataframe = DataFrame( { "prediction_label": ["apple", "orange", "grape"], "prediction_id": [str(x) for x in range(num_records)], "timestamp": [pd.Timestamp.now() for x in range(num_records)], "embedding_vector0": [np.zeros(embedding_dimensions) for _ in range(num_records)], "link_to_data0": [f"some-link{index}" for index in range(num_records)], "raw_data_column0": [f"some-text{index}" for index in range(num_records)], "embedding_vector1": [np.zeros(embedding_dimensions) for _ in range(num_records)], "link_to_data1": [f"some-link{index}" for index in range(num_records)], "raw_data_column1": [f"some-text{index}" for index in range(num_records)], } ) input_schema = Schema( prediction_label_column_name="prediction_label", timestamp_column_name="timestamp", embedding_feature_column_names={ "embedding_feature0": EmbeddingColumnNames( vector_column_name="embedding_vector0", link_to_data_column_name="link_to_data0", raw_data_column_name="raw_data_column0", ), "embedding_feature1": EmbeddingColumnNames( vector_column_name="embedding_vector1", link_to_data_column_name="link_to_data1", raw_data_column_name="raw_data_column1", ), }, ) return Inferences(dataframe=input_dataframe, schema=input_schema) def test_invalid_model_embeddings_primary_and_ref_embedding_size_mismatch( inferences_with_embedding_vector: Inferences, inferences_with_large_embedding_vector: Inferences ) -> None: with pytest.raises(ValueError): _ = _get_embedding_dimensions( inferences_with_embedding_vector, inferences_with_large_embedding_vector ) def test_valid_model_embeddings(inferences_with_embedding_vector: Inferences) -> None: embedding_dimensions = _get_embedding_dimensions( inferences_with_embedding_vector, inferences_with_embedding_vector ) assert len(embedding_dimensions) == 2 assert embedding_dimensions == [ EmbeddingDimension(name="embedding_feature0"), EmbeddingDimension(name="embedding_feature1"), ] def test_valid_model_embeddings_one_inferences_missing_embeddings_feature( inferences_with_embedding_vector: Inferences, ) -> None: num_records = 3 input_dataframe = DataFrame( { "prediction_label": ["apple", "orange", "grape"], "prediction_id": [str(x) for x in range(num_records)], "timestamp": [pd.Timestamp.now() for x in range(num_records)], } ) input_schema = Schema( prediction_label_column_name="prediction_label", timestamp_column_name="timestamp", ) inferences_with_missing_embedding_vector = Inferences( dataframe=input_dataframe, schema=input_schema ) embedding_dimensions = _get_embedding_dimensions( inferences_with_embedding_vector, inferences_with_missing_embedding_vector ) assert len(embedding_dimensions) == 2 assert embedding_dimensions == [ EmbeddingDimension(name="embedding_feature0"), EmbeddingDimension(name="embedding_feature1"), ] def test_valid_model_with_nan_embeddings(inferences_with_embedding_vector: Inferences) -> None: inferences_with_embedding_vector.dataframe["embedding_vector0"] = np.nan embedding_dimensions = _get_embedding_dimensions( inferences_with_embedding_vector, inferences_with_embedding_vector, ) assert len(embedding_dimensions) == 2 assert embedding_dimensions == [ EmbeddingDimension(name="embedding_feature0"), EmbeddingDimension(name="embedding_feature1"), ]

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server