test_binning.py•11.9 kB
import math
from dataclasses import MISSING
from typing import Any
import numpy as np
import pandas as pd
import pytest
from numpy.testing import assert_almost_equal
from pandas.testing import assert_series_equal
from phoenix.core.model_schema import Column
from phoenix.metrics import Metric, binning
from phoenix.metrics.metrics import Count, CountNotNull, Max
def test_additive_smoothing() -> None:
np.random.seed(12345)
x_index, y_index = np.random.rand(7), np.random.rand(7)
counts = (
pd.Series([0, 1, 2, 3, 0, 0, 0], name="x", index=x_index, dtype=int),
pd.Series([0, 0, 0, 3, 2, 1, 0], name="y", index=y_index, dtype=int),
)
for i, (desired_result, actual_result) in enumerate(
zip(
(
pd.Series(
[0.0769, 0.1538, 0.2308, 0.3077, 0.0769, 0.0769, 0.0769],
name="x",
index=x_index,
dtype=float,
),
pd.Series(
[0.0769, 0.0769, 0.0769, 0.3077, 0.2308, 0.1538, 0.0769],
name="y",
index=y_index,
dtype=float,
),
),
map(binning.AdditiveSmoothing(pseudocount=1), counts),
)
):
assert_almost_equal(actual_result.sum(), 1, err_msg=f"i={i}")
assert_series_equal(actual_result.round(4), desired_result)
for i, (desired_result, actual_result) in enumerate(
zip(
(
pd.Series(
[0.0149, 0.1642, 0.3134, 0.4627, 0.0149, 0.0149, 0.0149],
name="x",
index=x_index,
dtype=float,
),
pd.Series(
[0.0149, 0.0149, 0.0149, 0.4627, 0.3134, 0.1642, 0.0149],
name="y",
index=y_index,
dtype=float,
),
),
map(binning.AdditiveSmoothing(pseudocount=0.1), counts),
)
):
assert_almost_equal(actual_result.sum(), 1, err_msg=f"i={i}")
assert_series_equal(actual_result.round(4), desired_result)
for i, (desired_result, actual_result) in enumerate(
zip(
(
pd.Series(
[0, 0.1667, 0.3333, 0.5, 0, 0, 0],
name="x",
index=x_index,
dtype=float,
),
pd.Series(
[0, 0, 0, 0.5, 0.3333, 0.1667, 0],
name="y",
index=y_index,
dtype=float,
),
),
map(binning.AdditiveSmoothing(pseudocount=0), counts),
)
):
assert_almost_equal(actual_result.sum(), 1, err_msg=f"i={i}")
assert_series_equal(actual_result.round(4), desired_result)
data = pd.Series([-1, 0, 1, 2, 3, None, ""], dtype=object)
def test_categorical_binning() -> None:
assert_series_equal(
binning.CategoricalBinning().histogram(data),
data.value_counts(dropna=False),
)
assert_series_equal(
binning.CategoricalBinning(dropna=True).histogram(data),
data.value_counts(),
)
def test_interval_binning() -> None:
bins = pd.IntervalIndex(
(
pd.Interval(-np.inf, 1.0, closed="left"),
pd.Interval(1.0, 2.0, closed="left"),
pd.Interval(2.0, np.inf, closed="left"),
)
)
assert_series_equal(
binning.IntervalBinning(bins=bins).histogram(data),
pd.cut(data, bins).value_counts(dropna=False),
)
assert_series_equal(
binning.IntervalBinning(bins=bins, dropna=True).histogram(data),
pd.cut(data, bins).value_counts(),
)
def test_quantile_binning() -> None:
prob = (0.25, 0.5, 0.75)
bins = pd.IntervalIndex(
(
pd.Interval(-np.inf, 0.0, closed="left"),
pd.Interval(0.0, 1.0, closed="left"),
pd.Interval(1.0, 2.0, closed="left"),
pd.Interval(2.0, np.inf, closed="left"),
)
)
assert_series_equal(
binning.QuantileBinning(probabilities=prob).histogram(data),
pd.cut(data, bins).value_counts(dropna=False),
)
assert_series_equal(
binning.QuantileBinning(probabilities=prob, dropna=True).histogram(data),
pd.cut(data, bins).value_counts(),
)
def test_quantile_binning_reference_bins_adherence() -> None:
method = binning.QuantileBinning(
reference_series=data,
probabilities=(0.25, 0.5, 0.75),
dropna=True,
)
assert (bins := method.bins) is not None
new_data = pd.Series(range(2001)) - 1000
hist = method.histogram(new_data)
diff = hist.index.difference(bins)
assert hist.sum() == len(new_data)
assert diff.empty
@pytest.mark.parametrize("dropna", [(True,), (False,)])
def test_quantile_binning_dropna_adherence(dropna: bool) -> None:
method = binning.QuantileBinning(
reference_series=data,
probabilities=(0.25, 0.5, 0.75),
dropna=dropna,
)
new_data = pd.Series([None])
hist = method.histogram(new_data)
assert (method_bins := method.bins) is not None
diff = hist.index.difference(method_bins.tolist())
if dropna:
assert hist.sum() == 0
assert diff.empty
else:
assert hist.sum() == len(new_data)
assert diff.size == 1
assert math.isnan(diff[0])
@pytest.mark.parametrize(
"metrics,desired_values,dropna",
[
((), [], False),
((), [], True),
((Count(),), [[12, 5, 1]], False),
((Count(),), [[5, 1]], True),
((CountNotNull(Column("x")),), [[6, 4, 0]], False),
((CountNotNull(Column("x")),), [[4, 0]], True),
((CountNotNull(Column()),), [[0] * 3], False),
((CountNotNull(Column()),), [[0] * 2], True),
(
(
Max(Column("x")),
Max(Column()),
Max(Column("x2")),
Max(Column("x")),
),
[
[7.0, 6.0, np.nan],
[np.nan] * 3,
[14.0, 12.0, np.nan],
[7.0, 6.0, np.nan],
],
False,
),
(
(
Max(Column("x2")),
Max(Column()),
Max(Column("x")),
Max(Column("x2")),
),
[
[12.0, np.nan],
[np.nan] * 2,
[6.0, np.nan],
[12.0, np.nan],
],
True,
),
],
)
def test_segmented_summary_with_interval_binning(
metrics: tuple[Metric],
desired_values: list[list[Any]],
dropna: bool,
) -> None:
df = pd.DataFrame(
[
[np.nan, np.nan],
[None, -1],
[pd.NA, -np.inf], # infinities are not null
[pd.NaT, np.nan],
[MISSING, np.nan], # MISSING is not null
[-4, 5],
[-3, np.nan],
[-2, 1],
[0.1, 0],
[1, 4],
[" 1 ", 6], # " 1 " is same as 1 due to numeric coercion
[1.1, np.nan],
[2, 2],
[" 2 ", 3],
["", np.nan], # "" is same as NaN due to numeric coercion
["nan", np.nan],
[np.inf, 7],
[-np.inf, np.nan],
],
columns=["by", "x"],
)
df["x2"] = df["x"] * 2
df["x3"] = df["x"] * 3 # should not be summarized
bins = pd.IntervalIndex(
(
pd.Interval(-2, 2, closed="left"),
pd.Interval(100, 200, closed="left"), # not found in data
pd.Interval(-np.inf, -200, closed="left"),
),
)
binning_method = binning.IntervalBinning(
bins=bins,
dropna=dropna,
)
actual = binning_method.segmented_summary(
Column("by"),
df.sample(len(df)),
metrics,
)
desired = pd.DataFrame(
dict(zip((m.id() for m in metrics), desired_values)),
).set_axis(
pd.CategoricalIndex(
([] if dropna else [np.nan])
+ [
pd.Interval(-2, 2, closed="left"),
pd.Interval(-np.inf, -200, closed="left"),
],
categories=bins,
ordered=True,
),
axis=0,
)
_compare_summaries(metrics, actual, desired)
@pytest.mark.parametrize(
"metrics,desired_values,dropna",
[
((), [], False),
((), [], True),
((Count(),), [[4, 1, 2, 3, 1, 4, 1, 1]], False),
((Count(),), [[1, 2, 3, 1, 4, 1, 1]], True),
((CountNotNull(Column("x")),), [[4, 1, 2, 2, 0, 3, 0, 1]], False),
((CountNotNull(Column("x")),), [[1, 2, 2, 0, 3, 0, 1]], True),
((CountNotNull(Column()),), [[0] * 8], False),
((CountNotNull(Column()),), [[0] * 7], True),
(
(
Max(Column("x")),
Max(Column()),
Max(Column("x2")),
Max(Column("x")),
),
[
[-2, -1, 0, 1, np.nan, 2, np.nan, 3],
[np.nan] * 8,
[-4, -2, 0, 2, np.nan, 4, np.nan, 6],
[-2, -1, 0, 1, np.nan, 2, np.nan, 3],
],
False,
),
(
(
Max(Column("x2")),
Max(Column()),
Max(Column("x")),
Max(Column("x2")),
),
[
[-2, 0, 2, np.nan, 4, np.nan, 6],
[np.nan] * 7,
[-1, 0, 1, np.nan, 2, np.nan, 3],
[-2, 0, 2, np.nan, 4, np.nan, 6],
],
True,
),
],
)
def test_segmented_summary_with_categorical_binning(
metrics: tuple[Metric],
desired_values: list[list[Any]],
dropna: bool,
) -> None:
df = pd.DataFrame(
[
[np.nan, -2],
[pd.NA, -3],
[pd.NaT, -4],
[None, -5],
[MISSING, -1], # MISSING is not null
[0.1, 0],
[0.1, 0],
[1, 1],
[1, 1],
[1, np.nan],
["", np.nan],
["1", 2], # "1" differs from 1
["1", -np.inf], # infinities are not null
["1", 2],
["1", np.nan],
["nan", np.nan],
[-np.inf, 3],
],
columns=["by", "x"],
)
df["x2"] = df["x"] * 2
df["x3"] = df["x"] * 3 # should not be summarized
binning_method = binning.CategoricalBinning(
dropna=dropna,
)
actual = binning_method.segmented_summary(
Column("by"),
df.sample(len(df)),
metrics,
)
desired = pd.DataFrame(
dict(zip((m.id() for m in metrics), desired_values)),
).set_axis(
pd.CategoricalIndex(
([] if dropna else [np.nan]) + [MISSING, 0.1, 1, "", "1", "nan", -np.inf],
ordered=False,
),
axis=0,
)
_compare_summaries(metrics, actual, desired)
def _compare_summaries(
metrics: tuple[Metric],
actual: pd.DataFrame,
desired: pd.DataFrame,
) -> None:
assert_almost_equal(len(actual), len(desired))
assert_almost_equal(actual.size, desired.size)
for idx in desired.index.union(actual.index):
results = []
for summary in (actual, desired):
try:
results.append(summary.loc[idx])
except KeyError:
results.append({})
for metric in metrics:
actual_value, desired_value = map(metric.get_value, results)
assert_almost_equal(
actual_value,
desired_value,
err_msg=f"{repr(idx)}:{repr(metric)}",
)