"""Unit tests for the RelevanceScorer quick-win improvements."""
from ckan_mcp.helpers import RelevanceScorer
from ckan_mcp.types import (
CkanOrganization,
CkanPackage,
CkanResource,
CkanTag,
CkanToolsConfig,
RelevanceWeights,
)
def _build_config() -> CkanToolsConfig:
return CkanToolsConfig(
ckan_base_url="https://example.com",
relevance_weights=RelevanceWeights(),
)
def test_relevance_scorer_handles_multi_term_queries() -> None:
config = _build_config()
scorer = RelevanceScorer(config)
dataset = CkanPackage(
id="traffic-data",
title="Toronto Traffic Data Collection",
notes="Real-time traffic data from Toronto streets including vehicle counts",
tags=[
CkanTag(id="traffic", name="Traffic"),
CkanTag(id="data", name="Data"),
],
organization=CkanOrganization(
id="city-of-toronto",
name="city-of-toronto",
title="City of Toronto",
),
resources=[
CkanResource(
id="res1",
name="traffic_data_csv",
format="CSV",
url="http://example.com/traffic.csv",
)
],
)
score = scorer.score(dataset, "toronto traffic data")
# Every major field should contribute when multi-term queries are tokenized.
assert score == (
config.relevance_weights.title
+ config.relevance_weights.description
+ config.relevance_weights.tags
+ config.relevance_weights.organization
+ config.relevance_weights.resource
)
def test_relevance_scorer_ignores_stop_words_and_word_fragments() -> None:
config = _build_config()
scorer = RelevanceScorer(config)
dataset = CkanPackage(
id="traffic-data",
title="Toronto Traffic Data",
notes="Traffic data for testing",
tags=[CkanTag(id="traffic", name="Traffic")],
organization=CkanOrganization(
id="city-of-toronto",
name="city-of-toronto",
title="City of Toronto",
),
resources=[],
)
# Query is only stop words and short fragments, so it should be ignored.
assert scorer.score(dataset, "at the of") == 0
# Boundary matching prevents short fragments from hitting unrelated words.
assert scorer.score(dataset, "raf") == 0
def test_relevance_scorer_counts_multiple_matching_resources() -> None:
config = _build_config()
scorer = RelevanceScorer(config)
dataset = CkanPackage(
id="multi-resource",
resources=[
CkanResource(id="r1", name="traffic_counts", format="CSV", url="http://ex/r1"),
CkanResource(id="r2", name="traffic_incidents", format="JSON", url="http://ex/r2"),
CkanResource(id="r3", name="traffic_forecasts", format="ZIP", url="http://ex/r3"),
],
)
score = scorer.score(dataset, "traffic")
assert score == 3