xai-toolkit

xai-mcp
tests

test_narrators.py•10.4 KiB

"""Tests for narrators.py — verify narrative correctness. All tests use hand-crafted data to keep them fast and deterministic. No SHAP computation, no model loading — pure narrator logic. """ from xai_toolkit.narrators import ( narrate_dataset, narrate_feature_comparison, narrate_model_summary, narrate_partial_dependence, narrate_prediction, ) from xai_toolkit.schemas import ( DatasetDescription, FeatureImportance, ModelSummary, PartialDependenceResult, ShapResult, ) # --- Fixtures (hand-crafted data) --- def _make_shap_result() -> ShapResult: """Create a known ShapResult for deterministic testing.""" return ShapResult( prediction=1, prediction_label="malignant", probability=0.91, base_value=0.42, shap_values={ "worst_radius": 0.28, "worst_concave_points": 0.19, "mean_concavity": 0.14, "mean_smoothness": -0.06, "mean_texture": 0.03, }, feature_values={ "worst_radius": 23.4, "worst_concave_points": 0.18, "mean_concavity": 0.15, "mean_smoothness": 0.08, "mean_texture": 19.2, }, feature_names=[ "worst_radius", "worst_concave_points", "mean_concavity", "mean_smoothness", "mean_texture", ], ) def _make_model_summary() -> ModelSummary: return ModelSummary( model_type="XGBClassifier", accuracy=0.956, n_features=30, n_train_samples=455, n_test_samples=114, target_names=["malignant", "benign"], top_features=[ FeatureImportance(name="worst_radius", importance=0.15, direction="positive", mean_shap=0.12), FeatureImportance(name="worst_concave_points", importance=0.10, direction="positive", mean_shap=0.08), FeatureImportance(name="mean_concavity", importance=0.07, direction="positive", mean_shap=0.05), ], ) def _make_feature_importances() -> list[FeatureImportance]: return [ FeatureImportance(name="worst_radius", importance=0.15, direction="positive", mean_shap=0.12), FeatureImportance(name="worst_concave_points", importance=0.10, direction="positive", mean_shap=0.08), FeatureImportance(name="mean_concavity", importance=0.07, direction="positive", mean_shap=0.05), FeatureImportance(name="mean_smoothness", importance=0.04, direction="negative", mean_shap=-0.03), FeatureImportance(name="mean_texture", importance=0.02, direction="positive", mean_shap=0.01), ] def _make_pdp_result() -> PartialDependenceResult: return PartialDependenceResult( feature_name="mean radius", feature_values=[6.98, 10.0, 15.0, 20.0, 28.11], predictions=[0.12, 0.25, 0.55, 0.78, 0.89], ice_curves=[], # ICE curves not needed for narrator tests feature_min=6.98, feature_max=28.11, prediction_min=0.12, prediction_max=0.89, ) def _make_dataset_description() -> DatasetDescription: return DatasetDescription( n_samples=114, n_features=30, feature_names=["worst_radius", "mean_texture"], class_distribution={"malignant": 43, "benign": 71}, missing_values=0, feature_stats={ "worst_radius": {"mean": 16.27, "std": 4.83, "min": 7.93, "max": 36.04}, "mean_texture": {"mean": 19.29, "std": 4.30, "min": 9.71, "max": 39.28}, }, ) # --- narrate_prediction tests (Day 1, kept for regression) --- class TestNarratePrediction: """Tests aligned with Day 1 acceptance scenario D1-S2.""" def test_narrative_is_string(self): result = _make_shap_result() narrative = narrate_prediction(result) assert isinstance(narrative, str) def test_narrative_has_minimum_length(self): result = _make_shap_result() narrative = narrate_prediction(result) assert len(narrative) > 50, f"Narrative too short ({len(narrative)} chars)" def test_narrative_contains_classification(self): result = _make_shap_result() narrative = narrate_prediction(result) assert "classified this sample as" in narrative def test_narrative_contains_probability(self): result = _make_shap_result() narrative = narrate_prediction(result) assert "probability" in narrative def test_narrative_mentions_top_3_features(self): result = _make_shap_result() narrative = narrate_prediction(result, top_n=3) assert "worst_radius" in narrative assert "worst_concave_points" in narrative assert "mean_concavity" in narrative def test_each_feature_has_direction(self): result = _make_shap_result() narrative = narrate_prediction(result, top_n=3) assert "pushing toward" in narrative or "pushing away from" in narrative def test_each_feature_has_magnitude(self): result = _make_shap_result() narrative = narrate_prediction(result, top_n=3) assert "0.28" in narrative def test_narrative_is_deterministic(self): result = _make_shap_result() narrative_1 = narrate_prediction(result) narrative_2 = narrate_prediction(result) assert narrative_1 == narrative_2 def test_opposing_factor_mentioned(self): result = _make_shap_result() narrative = narrate_prediction(result, top_n=3) assert "opposing" in narrative.lower() assert "mean_smoothness" in narrative # --- narrate_model_summary tests (Day 2, D2-S1) --- class TestNarrateModelSummary: """Tests aligned with scenario D2-S1.""" def test_contains_model_type(self): narrative = narrate_model_summary(_make_model_summary()) assert "XGBClassifier" in narrative def test_contains_accuracy(self): narrative = narrate_model_summary(_make_model_summary()) assert "95.6%" in narrative def test_contains_feature_count(self): narrative = narrate_model_summary(_make_model_summary()) assert "30 features" in narrative def test_contains_top_features(self): narrative = narrate_model_summary(_make_model_summary()) assert "worst_radius" in narrative assert "worst_concave_points" in narrative assert "mean_concavity" in narrative def test_contains_target_names(self): narrative = narrate_model_summary(_make_model_summary()) assert "malignant" in narrative assert "benign" in narrative def test_is_deterministic(self): n1 = narrate_model_summary(_make_model_summary()) n2 = narrate_model_summary(_make_model_summary()) assert n1 == n2 # --- narrate_feature_comparison tests (Day 2, D2-S2) --- class TestNarrateFeatureComparison: """Tests aligned with scenario D2-S2.""" def test_contains_ranked_features(self): narrative = narrate_feature_comparison(_make_feature_importances()) assert "worst_radius" in narrative assert "#1" in narrative def test_features_appear_in_order(self): narrative = narrate_feature_comparison(_make_feature_importances()) pos_radius = narrative.index("worst_radius") pos_concave = narrative.index("worst_concave_points") assert pos_radius < pos_concave, "Features should appear in importance order" def test_contains_magnitude(self): narrative = narrate_feature_comparison(_make_feature_importances()) assert "0.15" in narrative # worst_radius importance def test_contains_direction(self): narrative = narrate_feature_comparison(_make_feature_importances()) assert "increase risk" in narrative or "decrease risk" in narrative def test_uses_comparative_language(self): narrative = narrate_feature_comparison(_make_feature_importances()) # The first feature is 1.5x more important → should mention this assert "more influential" in narrative or "Ranked" in narrative def test_is_deterministic(self): n1 = narrate_feature_comparison(_make_feature_importances()) n2 = narrate_feature_comparison(_make_feature_importances()) assert n1 == n2 # --- narrate_partial_dependence tests (Day 2, D2-S3) --- class TestNarratePartialDependence: """Tests aligned with scenario D2-S3.""" def test_describes_relationship(self): narrative = narrate_partial_dependence(_make_pdp_result()) assert "mean radius" in narrative assert "increases" in narrative or "decreases" in narrative def test_contains_feature_range(self): narrative = narrate_partial_dependence(_make_pdp_result()) assert "6.98" in narrative assert "28.11" in narrative def test_contains_prediction_range(self): narrative = narrate_partial_dependence(_make_pdp_result()) assert "12.0%" in narrative or "12%" in narrative assert "89.0%" in narrative or "89%" in narrative def test_mentions_steepest_change(self): narrative = narrate_partial_dependence(_make_pdp_result()) assert "steepest" in narrative def test_is_deterministic(self): n1 = narrate_partial_dependence(_make_pdp_result()) n2 = narrate_partial_dependence(_make_pdp_result()) assert n1 == n2 # --- narrate_dataset tests (Day 2, D2-S4) --- class TestNarrateDataset: """Tests aligned with scenario D2-S4.""" def test_contains_sample_count(self): narrative = narrate_dataset(_make_dataset_description()) assert "114 samples" in narrative def test_contains_feature_count(self): narrative = narrate_dataset(_make_dataset_description()) assert "30 features" in narrative def test_contains_class_distribution(self): narrative = narrate_dataset(_make_dataset_description()) assert "malignant" in narrative assert "benign" in narrative assert "43" in narrative assert "71" in narrative def test_mentions_missing_values(self): narrative = narrate_dataset(_make_dataset_description()) assert "missing" in narrative.lower() def test_zero_missing_says_no_missing(self): narrative = narrate_dataset(_make_dataset_description()) assert "no missing" in narrative.lower() def test_is_deterministic(self): n1 = narrate_dataset(_make_dataset_description()) n2 = narrate_dataset(_make_dataset_description()) assert n1 == n2

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/florenciakabas/xai-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_narrators.py•10.4 KiB