import sys
from pathlib import Path
import pandas as pd
import pytest
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from mcp_server.datasets import DatasetLoadError, LoadedDataset # noqa: E402
from mcp_server.links import link_compound_to_nordic_plants # noqa: E402
def test_link_compound_to_nordic_plants_merges_records(monkeypatch):
compound = "Arctigenin"
coconut_df = pd.DataFrame(
{
"name": ["arctigenin"],
"organisms": ["Arctium lappa|Rosa canina"],
}
)
laji_df = pd.DataFrame(
{
"Scientific name": ["Arctium lappa", "Rosa canina"],
"Observation count from Finland": [1200, 900],
"Identifier": ["MX.123", "MX.456"],
}
)
gbif_df = pd.DataFrame(
{
"canonicalName": ["arctium lappa", "rosa canina"],
"speciesKey": [12345, 67890],
"obs_FI": [1500, 1100],
"obs_NO": [200, 150],
"count_FI_60N": [400, 300],
"count_NO_60N": [50, 40],
"count_FI_66N": [20, 10],
"count_NO_66N": [5, 3],
}
)
dataset_map = {
"coconut_csv-09-2025_FI_NO_plants.csv": coconut_df,
"laji2_fi.txt": laji_df,
"gbif_plants_FI_NO_merged.tsv": gbif_df,
}
def fake_load_dataset(name: str):
if name not in dataset_map:
raise DatasetLoadError(f"missing dataset {name}")
return LoadedDataset(name=name, frame=dataset_map[name].copy(), source_path=Path(name), metadata={})
monkeypatch.setattr("mcp_server.links.load_dataset", fake_load_dataset)
result = link_compound_to_nordic_plants(compound)
assert result["compound"].lower() == "arctigenin"
assert sorted(result["organisms"]) == ["Arctium lappa", "Rosa canina"]
rows = {row["organism"]: row for row in result["table"]}
assert rows["Arctium lappa"]["obs_FI_laji"] == 1200
assert rows["Arctium lappa"]["obs_FI_gbif"] == 1500
assert rows["Arctium lappa"]["url_laji"].endswith("MX.123/occurrence")
assert rows["Arctium lappa"]["url_gbif"].endswith("/12345")
assert rows["Rosa canina"]["obs_NO_gbif"] == 150
assert result["missing_in_laji"] == []
assert result["missing_in_gbif"] == []
def test_link_compound_to_nordic_plants_missing_compound(monkeypatch):
coconut_df = pd.DataFrame({"name": ["arctigenin"], "organisms": ["Arctium lappa"]})
dataset_map = {
"coconut_csv-09-2025_FI_NO_plants.csv": coconut_df,
"laji2_fi.txt": pd.DataFrame(columns=["Scientific name", "Observation count from Finland", "Identifier"]),
"gbif_plants_FI_NO_merged.tsv": pd.DataFrame(
columns=["canonicalName", "speciesKey", "obs_FI", "obs_NO", "count_FI_60N", "count_NO_60N", "count_FI_66N", "count_NO_66N"]
),
}
def fake_load_dataset(name: str):
return LoadedDataset(name=name, frame=dataset_map[name].copy(), source_path=Path(name), metadata={})
monkeypatch.setattr("mcp_server.links.load_dataset", fake_load_dataset)
result = link_compound_to_nordic_plants("unknown")
assert result["organisms"] == []
assert result["table"] == []