import sys
from pathlib import Path
import pytest
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from mcp_server.datasets import ( # noqa: E402
DatasetLoadError,
get_metadata,
load_dataset,
metadata_keys,
)
def test_metadata_keys_contains_known_dataset():
keys = metadata_keys()
assert "all_mito_complex_I_inhibitors.txt" in keys
def test_get_dataset_metadata_returns_expected_title():
meta = get_metadata("all_mito_complex_I_inhibitors.txt")
assert meta is not None
assert meta["title"].startswith("All mitochondrial Complex I inhibitors")
def test_load_dataset_parses_tab_delimited_file():
dataset = load_dataset("all_mito_complex_I_inhibitors.txt")
assert not dataset.frame.empty
assert set(["compound", "pubmed_references", "known_status"]).issubset(dataset.frame.columns)
def test_load_dataset_handles_newline_separated_file():
dataset = load_dataset("plants_genera.txt")
assert "genus" in dataset.frame.columns
assert len(dataset.frame) > 10
def test_load_dataset_raises_for_git_lfs_pointer():
with pytest.raises(DatasetLoadError):
load_dataset("coconut_csv-09-2025_FI_NO_plants.csv")