We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
import math
from dataclasses import MISSING
from typing import Any
import numpy as np
import pandas as pd
import pytest
from numpy.testing import assert_almost_equal
from pandas.testing import assert_series_equal
from phoenix.core.model_schema import Column
from phoenix.metrics import Metric, binning
from phoenix.metrics.metrics import Count, CountNotNull, Max
def test_additive_smoothing() -> None:
np.random.seed(12345)
x_index, y_index = np.random.rand(7), np.random.rand(7)
counts = (
pd.Series([0, 1, 2, 3, 0, 0, 0], name="x", index=x_index, dtype=int),
pd.Series([0, 0, 0, 3, 2, 1, 0], name="y", index=y_index, dtype=int),
)
for i, (desired_result, actual_result) in enumerate(
zip(
(
pd.Series(
[0.0769, 0.1538, 0.2308, 0.3077, 0.0769, 0.0769, 0.0769],
name="x",
index=x_index,
dtype=float,
),
pd.Series(
[0.0769, 0.0769, 0.0769, 0.3077, 0.2308, 0.1538, 0.0769],
name="y",
index=y_index,
dtype=float,
),
),
map(binning.AdditiveSmoothing(pseudocount=1), counts),
)
):
assert_almost_equal(actual_result.sum(), 1, err_msg=f"i={i}")
assert_series_equal(actual_result.round(4), desired_result)
for i, (desired_result, actual_result) in enumerate(
zip(
(
pd.Series(
[0.0149, 0.1642, 0.3134, 0.4627, 0.0149, 0.0149, 0.0149],
name="x",
index=x_index,
dtype=float,
),
pd.Series(
[0.0149, 0.0149, 0.0149, 0.4627, 0.3134, 0.1642, 0.0149],
name="y",
index=y_index,
dtype=float,
),
),
map(binning.AdditiveSmoothing(pseudocount=0.1), counts),
)
):
assert_almost_equal(actual_result.sum(), 1, err_msg=f"i={i}")
assert_series_equal(actual_result.round(4), desired_result)
for i, (desired_result, actual_result) in enumerate(
zip(
(
pd.Series(
[0, 0.1667, 0.3333, 0.5, 0, 0, 0],
name="x",
index=x_index,
dtype=float,
),
pd.Series(
[0, 0, 0, 0.5, 0.3333, 0.1667, 0],
name="y",
index=y_index,
dtype=float,
),
),
map(binning.AdditiveSmoothing(pseudocount=0), counts),
)
):
assert_almost_equal(actual_result.sum(), 1, err_msg=f"i={i}")
assert_series_equal(actual_result.round(4), desired_result)
data = pd.Series([-1, 0, 1, 2, 3, None, ""], dtype=object)
def test_categorical_binning() -> None:
assert_series_equal(
binning.CategoricalBinning().histogram(data),
data.value_counts(dropna=False),
)
assert_series_equal(
binning.CategoricalBinning(dropna=True).histogram(data),
data.value_counts(),
)
def test_interval_binning() -> None:
bins = pd.IntervalIndex(
(
pd.Interval(-np.inf, 1.0, closed="left"),
pd.Interval(1.0, 2.0, closed="left"),
pd.Interval(2.0, np.inf, closed="left"),
)
)
assert_series_equal(
binning.IntervalBinning(bins=bins).histogram(data),
pd.cut(data, bins).value_counts(dropna=False),
)
assert_series_equal(
binning.IntervalBinning(bins=bins, dropna=True).histogram(data),
pd.cut(data, bins).value_counts(),
)
def test_quantile_binning() -> None:
prob = (0.25, 0.5, 0.75)
bins = pd.IntervalIndex(
(
pd.Interval(-np.inf, 0.0, closed="left"),
pd.Interval(0.0, 1.0, closed="left"),
pd.Interval(1.0, 2.0, closed="left"),
pd.Interval(2.0, np.inf, closed="left"),
)
)
assert_series_equal(
binning.QuantileBinning(probabilities=prob).histogram(data),
pd.cut(data, bins).value_counts(dropna=False),
)
assert_series_equal(
binning.QuantileBinning(probabilities=prob, dropna=True).histogram(data),
pd.cut(data, bins).value_counts(),
)
def test_quantile_binning_reference_bins_adherence() -> None:
method = binning.QuantileBinning(
reference_series=data,
probabilities=(0.25, 0.5, 0.75),
dropna=True,
)
assert (bins := method.bins) is not None
new_data = pd.Series(range(2001)) - 1000
hist = method.histogram(new_data)
diff = hist.index.difference(bins)
assert hist.sum() == len(new_data)
assert diff.empty
@pytest.mark.parametrize("dropna", [(True,), (False,)])
def test_quantile_binning_dropna_adherence(dropna: bool) -> None:
method = binning.QuantileBinning(
reference_series=data,
probabilities=(0.25, 0.5, 0.75),
dropna=dropna,
)
new_data = pd.Series([None])
hist = method.histogram(new_data)
assert (method_bins := method.bins) is not None
diff = hist.index.difference(method_bins.tolist())
if dropna:
assert hist.sum() == 0
assert diff.empty
else:
assert hist.sum() == len(new_data)
assert diff.size == 1
assert math.isnan(diff[0])
@pytest.mark.parametrize(
"metrics,desired_values,dropna",
[
((), [], False),
((), [], True),
((Count(),), [[12, 5, 1]], False),
((Count(),), [[5, 1]], True),
((CountNotNull(Column("x")),), [[6, 4, 0]], False),
((CountNotNull(Column("x")),), [[4, 0]], True),
((CountNotNull(Column()),), [[0] * 3], False),
((CountNotNull(Column()),), [[0] * 2], True),
(
(
Max(Column("x")),
Max(Column()),
Max(Column("x2")),
Max(Column("x")),
),
[
[7.0, 6.0, np.nan],
[np.nan] * 3,
[14.0, 12.0, np.nan],
[7.0, 6.0, np.nan],
],
False,
),
(
(
Max(Column("x2")),
Max(Column()),
Max(Column("x")),
Max(Column("x2")),
),
[
[12.0, np.nan],
[np.nan] * 2,
[6.0, np.nan],
[12.0, np.nan],
],
True,
),
],
)
def test_segmented_summary_with_interval_binning(
metrics: tuple[Metric],
desired_values: list[list[Any]],
dropna: bool,
) -> None:
df = pd.DataFrame(
[
[np.nan, np.nan],
[None, -1],
[pd.NA, -np.inf], # infinities are not null
[pd.NaT, np.nan],
[MISSING, np.nan], # MISSING is not null
[-4, 5],
[-3, np.nan],
[-2, 1],
[0.1, 0],
[1, 4],
[" 1 ", 6], # " 1 " is same as 1 due to numeric coercion
[1.1, np.nan],
[2, 2],
[" 2 ", 3],
["", np.nan], # "" is same as NaN due to numeric coercion
["nan", np.nan],
[np.inf, 7],
[-np.inf, np.nan],
],
columns=["by", "x"],
)
df["x2"] = df["x"] * 2
df["x3"] = df["x"] * 3 # should not be summarized
bins = pd.IntervalIndex(
(
pd.Interval(-2, 2, closed="left"),
pd.Interval(100, 200, closed="left"), # not found in data
pd.Interval(-np.inf, -200, closed="left"),
),
)
binning_method = binning.IntervalBinning(
bins=bins,
dropna=dropna,
)
actual = binning_method.segmented_summary(
Column("by"),
df.sample(len(df)),
metrics,
)
desired = pd.DataFrame(
dict(zip((m.id() for m in metrics), desired_values)),
).set_axis(
pd.CategoricalIndex(
([] if dropna else [np.nan])
+ [
pd.Interval(-2, 2, closed="left"),
pd.Interval(-np.inf, -200, closed="left"),
],
categories=bins,
ordered=True,
),
axis=0,
)
_compare_summaries(metrics, actual, desired)
@pytest.mark.parametrize(
"metrics,desired_values,dropna",
[
((), [], False),
((), [], True),
((Count(),), [[4, 1, 2, 3, 1, 4, 1, 1]], False),
((Count(),), [[1, 2, 3, 1, 4, 1, 1]], True),
((CountNotNull(Column("x")),), [[4, 1, 2, 2, 0, 3, 0, 1]], False),
((CountNotNull(Column("x")),), [[1, 2, 2, 0, 3, 0, 1]], True),
((CountNotNull(Column()),), [[0] * 8], False),
((CountNotNull(Column()),), [[0] * 7], True),
(
(
Max(Column("x")),
Max(Column()),
Max(Column("x2")),
Max(Column("x")),
),
[
[-2, -1, 0, 1, np.nan, 2, np.nan, 3],
[np.nan] * 8,
[-4, -2, 0, 2, np.nan, 4, np.nan, 6],
[-2, -1, 0, 1, np.nan, 2, np.nan, 3],
],
False,
),
(
(
Max(Column("x2")),
Max(Column()),
Max(Column("x")),
Max(Column("x2")),
),
[
[-2, 0, 2, np.nan, 4, np.nan, 6],
[np.nan] * 7,
[-1, 0, 1, np.nan, 2, np.nan, 3],
[-2, 0, 2, np.nan, 4, np.nan, 6],
],
True,
),
],
)
def test_segmented_summary_with_categorical_binning(
metrics: tuple[Metric],
desired_values: list[list[Any]],
dropna: bool,
) -> None:
df = pd.DataFrame(
[
[np.nan, -2],
[pd.NA, -3],
[pd.NaT, -4],
[None, -5],
[MISSING, -1], # MISSING is not null
[0.1, 0],
[0.1, 0],
[1, 1],
[1, 1],
[1, np.nan],
["", np.nan],
["1", 2], # "1" differs from 1
["1", -np.inf], # infinities are not null
["1", 2],
["1", np.nan],
["nan", np.nan],
[-np.inf, 3],
],
columns=["by", "x"],
)
df["x2"] = df["x"] * 2
df["x3"] = df["x"] * 3 # should not be summarized
binning_method = binning.CategoricalBinning(
dropna=dropna,
)
actual = binning_method.segmented_summary(
Column("by"),
df.sample(len(df)),
metrics,
)
desired = pd.DataFrame(
dict(zip((m.id() for m in metrics), desired_values)),
).set_axis(
pd.CategoricalIndex(
([] if dropna else [np.nan]) + [MISSING, 0.1, 1, "", "1", "nan", -np.inf],
ordered=False,
),
axis=0,
)
_compare_summaries(metrics, actual, desired)
def _compare_summaries(
metrics: tuple[Metric],
actual: pd.DataFrame,
desired: pd.DataFrame,
) -> None:
assert_almost_equal(len(actual), len(desired))
assert_almost_equal(actual.size, desired.size)
for idx in desired.index.union(actual.index):
results = []
for summary in (actual, desired):
try:
results.append(summary.loc[idx])
except KeyError:
results.append({})
for metric in metrics:
actual_value, desired_value = map(metric.get_value, results)
assert_almost_equal(
actual_value,
desired_value,
err_msg=f"{repr(idx)}:{repr(metric)}",
)