Skip to main content
Glama

@arizeai/phoenix-mcp

Official
by Arize-ai
conftest.py27.8 kB
from datetime import datetime, timezone from typing import Any import pytest from sqlalchemy import insert from phoenix.db import models from phoenix.server.types import DbSessionFactory @pytest.fixture async def span_data_with_documents(db: DbSessionFactory) -> None: async with db() as session: project = models.Project(name="default") session.add(project) await session.flush() trace = models.Trace( project_rowid=project.id, trace_id="61d6af1c1765cf22f5d0454d30a09be7", start_time=datetime.now(), end_time=datetime.now(), ) session.add(trace) await session.flush() span = models.Span( trace_rowid=trace.id, span_id="f2fbba1d7911049c", name="foo", span_kind="bar", start_time=datetime.now(), end_time=datetime.now(), attributes={ "retrieval": { "documents": [ {"document": {"content": "zero"}}, {"document": {"content": "one"}}, ] } }, events=[ { "name": "exception", "timestamp": datetime.now(), "exception.message": "uh-oh", } ], status_code="ERROR", status_message="no", cumulative_error_count=1, cumulative_llm_token_count_prompt=0, cumulative_llm_token_count_completion=0, ) session.add(span) await session.flush() @pytest.fixture async def simple_dataset(db: DbSessionFactory) -> None: """ A dataset with one example added in one version """ async with db() as session: dataset = models.Dataset( id=0, name="simple dataset", description=None, metadata_={"info": "a test dataset"}, ) session.add(dataset) await session.flush() dataset_version_0 = models.DatasetVersion( id=0, dataset_id=0, description="the first version", metadata_={"info": "gotta get some test data somewhere"}, ) session.add(dataset_version_0) await session.flush() example_0 = models.DatasetExample( id=0, dataset_id=0, ) session.add(example_0) await session.flush() example_0_revision_0 = models.DatasetExampleRevision( id=0, dataset_example_id=0, dataset_version_id=0, input={"in": "foo"}, output={"out": "bar"}, metadata_={"info": "the first reivision"}, revision_kind="CREATE", ) session.add(example_0_revision_0) await session.flush() @pytest.fixture async def empty_dataset(db: DbSessionFactory) -> None: """ A dataset with three versions, where two examples are added, patched, then deleted """ async with db() as session: dataset = models.Dataset( id=1, name="empty dataset", description="emptied after two revisions", metadata_={}, ) session.add(dataset) await session.flush() dataset_version_1 = models.DatasetVersion( id=1, dataset_id=1, description="data gets added", metadata_={"info": "gotta get some test data somewhere"}, ) session.add(dataset_version_1) await session.flush() example_1 = models.DatasetExample( id=1, dataset_id=1, ) session.add(example_1) await session.flush() example_2 = models.DatasetExample( id=2, dataset_id=1, ) session.add(example_2) await session.flush() example_1_revision_1 = models.DatasetExampleRevision( id=1, dataset_example_id=1, dataset_version_id=1, input={"in": "foo"}, output={"out": "bar"}, metadata_={"info": "first revision"}, revision_kind="CREATE", ) session.add(example_1_revision_1) await session.flush() example_2_revision_1 = models.DatasetExampleRevision( id=2, dataset_example_id=2, dataset_version_id=1, input={"in": "foofoo"}, output={"out": "barbar"}, metadata_={"info": "first revision"}, revision_kind="CREATE", ) session.add(example_2_revision_1) await session.flush() dataset_version_2 = models.DatasetVersion( id=2, dataset_id=1, description="data gets patched", metadata_={"info": "all caps patch"}, ) session.add(dataset_version_2) await session.flush() example_1_revision_2 = models.DatasetExampleRevision( id=3, dataset_example_id=1, dataset_version_id=2, input={"in": "FOO"}, output={"out": "BAR"}, metadata_={"info": "all caps revision"}, revision_kind="PATCH", ) session.add(example_1_revision_2) await session.flush() example_2_revision_2 = models.DatasetExampleRevision( id=4, dataset_example_id=2, dataset_version_id=2, input={"in": "FOOFOO"}, output={"out": "BARBAR"}, metadata_={"info": "all caps revision"}, revision_kind="PATCH", ) session.add(example_2_revision_2) await session.flush() dataset_version_3 = models.DatasetVersion( id=3, dataset_id=1, description="data gets deleted", metadata_={"info": "all gone"}, ) session.add(dataset_version_3) await session.flush() example_1_revision_3 = models.DatasetExampleRevision( id=5, dataset_example_id=1, dataset_version_id=3, input={}, output={}, metadata_={"info": "all caps revision"}, revision_kind="DELETE", ) session.add(example_1_revision_3) await session.flush() example_2_revision_3 = models.DatasetExampleRevision( id=6, dataset_example_id=2, dataset_version_id=3, input={}, output={}, metadata_={"info": "all caps revision"}, revision_kind="DELETE", ) session.add(example_2_revision_3) await session.flush() @pytest.fixture async def dataset_with_revisions(db: DbSessionFactory) -> None: """ A dataset with six versions, first two examples are added, then one example is patched and a third example is added. The last four revisions alternate between adding then removing an example. """ async with db() as session: dataset = models.Dataset( id=2, name="revised dataset", description="this dataset grows over time", metadata_={}, ) session.add(dataset) await session.flush() dataset_version_4 = models.DatasetVersion( id=4, dataset_id=2, description="data gets added", metadata_={"info": "gotta get some test data somewhere"}, created_at=datetime.fromisoformat("2024-05-28T00:00:04+00:00"), ) session.add(dataset_version_4) await session.flush() example_3 = models.DatasetExample( id=3, dataset_id=2, ) session.add(example_3) await session.flush() example_4 = models.DatasetExample( id=4, dataset_id=2, ) session.add(example_4) await session.flush() example_3_revision_4 = models.DatasetExampleRevision( id=7, dataset_example_id=3, dataset_version_id=4, input={"in": "foo"}, output={"out": "bar"}, metadata_={"info": "first revision"}, revision_kind="CREATE", ) session.add(example_3_revision_4) await session.flush() example_4_revision_4 = models.DatasetExampleRevision( id=8, dataset_example_id=4, dataset_version_id=4, input={"in": "foofoo"}, output={"out": "barbar"}, metadata_={"info": "first revision"}, revision_kind="CREATE", ) session.add(example_4_revision_4) await session.flush() dataset_version_5 = models.DatasetVersion( id=5, dataset_id=2, description="data gets patched and added", metadata_={}, created_at=datetime.fromisoformat("2024-05-28T00:00:05+00:00"), ) session.add(dataset_version_5) await session.flush() dataset_version_6 = models.DatasetVersion( id=6, dataset_id=2, description="datum gets created", metadata_={}, created_at=datetime.fromisoformat("2024-05-28T00:00:06+00:00"), ) session.add(dataset_version_6) await session.flush() dataset_version_7 = models.DatasetVersion( id=7, dataset_id=2, description="datum gets deleted", metadata_={}, created_at=datetime.fromisoformat("2024-05-28T00:00:07+00:00"), ) session.add(dataset_version_7) await session.flush() dataset_version_8 = models.DatasetVersion( id=8, dataset_id=2, description="datum gets created", metadata_={}, created_at=datetime.fromisoformat("2024-05-28T00:00:08+00:00"), ) session.add(dataset_version_8) await session.flush() dataset_version_9 = models.DatasetVersion( id=9, dataset_id=2, description="datum gets deleted", metadata_={}, created_at=datetime.fromisoformat("2024-05-28T00:00:09+00:00"), ) session.add(dataset_version_9) await session.flush() example_5 = models.DatasetExample( id=5, dataset_id=2, ) session.add(example_5) await session.flush() example_6 = models.DatasetExample( id=6, dataset_id=2, ) session.add(example_6) await session.flush() example_7 = models.DatasetExample( id=7, dataset_id=2, ) session.add(example_7) await session.flush() example_4_revision_5 = models.DatasetExampleRevision( id=9, dataset_example_id=4, dataset_version_id=5, input={"in": "updated foofoo"}, output={"out": "updated barbar"}, metadata_={"info": "updating revision"}, revision_kind="PATCH", ) session.add(example_4_revision_5) await session.flush() example_5_revision_5 = models.DatasetExampleRevision( id=10, dataset_example_id=5, dataset_version_id=5, input={"in": "look at me"}, output={"out": "i have all the answers"}, metadata_={"info": "a new example"}, revision_kind="CREATE", ) session.add(example_5_revision_5) await session.flush() example_6_revision_6 = models.DatasetExampleRevision( id=11, dataset_example_id=example_6.id, dataset_version_id=dataset_version_6.id, input={"in": "look at us"}, output={"out": "we have all the answers"}, metadata_={"info": "a new example"}, revision_kind="CREATE", ) session.add(example_6_revision_6) await session.flush() example_6_revision_7 = models.DatasetExampleRevision( id=12, dataset_example_id=example_6.id, dataset_version_id=dataset_version_7.id, input={"in": "look at us"}, output={"out": "we have all the answers"}, metadata_={"info": "a new example"}, revision_kind="DELETE", ) session.add(example_6_revision_7) await session.flush() example_7_revision_8 = models.DatasetExampleRevision( id=13, dataset_example_id=example_7.id, dataset_version_id=dataset_version_8.id, input={"in": "look at me"}, output={"out": "i have all the answers"}, metadata_={"info": "a newer example"}, revision_kind="CREATE", ) session.add(example_7_revision_8) await session.flush() example_7_revision_9 = models.DatasetExampleRevision( id=14, dataset_example_id=example_7.id, dataset_version_id=dataset_version_9.id, input={"in": "look at me"}, output={"out": "i have all the answers"}, metadata_={"info": "a newer example"}, revision_kind="DELETE", ) session.add(example_7_revision_9) await session.flush() @pytest.fixture async def dataset_with_experiments_without_runs( db: DbSessionFactory, empty_dataset: Any, ) -> None: async with db() as session: experiment_0 = models.Experiment( id=0, dataset_id=1, dataset_version_id=1, name="test", repetitions=1, project_name="default", metadata_={"info": "a test experiment"}, ) session.add(experiment_0) await session.flush() experiment_1 = models.Experiment( id=1, dataset_id=1, dataset_version_id=2, name="second test", repetitions=1, project_name="random", metadata_={"info": "a second test experiment"}, ) session.add(experiment_1) await session.flush() @pytest.fixture async def dataset_with_experiments_and_runs( db: DbSessionFactory, dataset_with_experiments_without_runs: Any, ) -> None: async with db() as session: experiment_run_0 = models.ExperimentRun( id=0, experiment_id=0, dataset_example_id=1, output={"out": "barr"}, repetition_number=1, start_time=datetime.now(), end_time=datetime.now(), error=None, ) session.add(experiment_run_0) await session.flush() experiment_run_1 = models.ExperimentRun( id=1, experiment_id=0, dataset_example_id=2, output={"out": "barbarr"}, repetition_number=1, start_time=datetime.now(), end_time=datetime.now(), error=None, ) session.add(experiment_run_1) await session.flush() experiment_run_2 = models.ExperimentRun( id=2, experiment_id=1, dataset_example_id=1, output={"out": "bar"}, repetition_number=1, start_time=datetime.now(), end_time=datetime.now(), error=None, ) session.add(experiment_run_2) await session.flush() experiment_run_3 = models.ExperimentRun( id=3, experiment_id=1, dataset_example_id=2, output=None, repetition_number=1, start_time=datetime.now(), end_time=datetime.now(), error="something funny happened", ) session.add(experiment_run_3) await session.flush() @pytest.fixture async def dataset_with_experiments_runs_and_evals( db: DbSessionFactory, dataset_with_experiments_and_runs: Any, ) -> None: async with db() as session: experiment_evaluation_0 = models.ExperimentRunAnnotation( id=0, experiment_run_id=0, name="test", annotator_kind="LLM", label="test", score=0.8, explanation="test", error=None, metadata_={"info": "a test evaluation"}, start_time=datetime.now(), end_time=datetime.now(), ) session.add(experiment_evaluation_0) await session.flush() experiment_evaluation_1 = models.ExperimentRunAnnotation( id=1, experiment_run_id=1, name="test", annotator_kind="LLM", label="test", score=0.9, explanation="test", error=None, metadata_={"info": "a test evaluation"}, start_time=datetime.now(), end_time=datetime.now(), ) session.add(experiment_evaluation_1) await session.flush() experiment_evaluation_2 = models.ExperimentRunAnnotation( id=2, experiment_run_id=2, name="second experiment", annotator_kind="LLM", label="test2", score=1, explanation="test", error=None, metadata_={"info": "a test evaluation"}, start_time=datetime.now(), end_time=datetime.now(), ) session.add(experiment_evaluation_2) await session.flush() experiment_evaluation_3 = models.ExperimentRunAnnotation( id=3, experiment_run_id=3, name="experiment", annotator_kind="LLM", label="test2", score=None, explanation="test", error="something funnier happened", metadata_={"info": "a test evaluation"}, start_time=datetime.now(), end_time=datetime.now(), ) session.add(experiment_evaluation_3) await session.flush() @pytest.fixture async def dataset_with_messages( db: DbSessionFactory, ) -> tuple[int, int]: async with db() as session: dataset_id = await session.scalar( insert(models.Dataset).returning(models.Dataset.id), [{"name": "xyz", "metadata_": {}}], ) dataset_version_id = await session.scalar( insert(models.DatasetVersion).returning(models.DatasetVersion.id), [{"dataset_id": dataset_id, "metadata_": {}}], ) dataset_example_ids = list( await session.scalars( insert(models.DatasetExample).returning(models.DatasetExample.id), [{"dataset_id": dataset_id}, {"dataset_id": dataset_id}], ) ) await session.scalar( insert(models.DatasetExampleRevision).returning(models.DatasetExampleRevision.id), [ { "revision_kind": "CREATE", "dataset_example_id": dataset_example_ids[0], "dataset_version_id": dataset_version_id, "input": { "messages": [ {"role": "system", "content": "x"}, {"role": "user", "content": "y"}, ] }, "output": { "messages": [ {"role": "assistant", "content": "z"}, ] }, "metadata_": {}, }, { "revision_kind": "CREATE", "dataset_example_id": dataset_example_ids[1], "dataset_version_id": dataset_version_id, "input": { "messages": [ {"role": "system", "content": "xx"}, {"role": "user", "content": "yy"}, ] }, "output": { "messages": [ {"role": "assistant", "content": "zz"}, ] }, "metadata_": {}, }, ], ) assert dataset_id is not None assert dataset_version_id is not None return dataset_id, dataset_version_id @pytest.fixture async def playground_dataset_with_patch_revision(db: DbSessionFactory) -> None: """ A dataset with a single example and two versions. In the first version, the dataset example is created. In the second version, the dataset example is patched. """ dataset = models.Dataset( id=1, name="dataset-name", metadata_={}, ) versions = [ models.DatasetVersion( id=1, dataset_id=dataset.id, metadata_={}, ), models.DatasetVersion( id=2, dataset_id=dataset.id, metadata_={}, ), ] examples = [ models.DatasetExample( id=1, dataset_id=dataset.id, created_at=datetime(year=2020, month=1, day=1, hour=0, minute=0, tzinfo=timezone.utc), ), models.DatasetExample( id=2, dataset_id=dataset.id, created_at=datetime(year=2020, month=2, day=2, hour=0, minute=0, tzinfo=timezone.utc), ), models.DatasetExample( id=3, dataset_id=dataset.id, created_at=datetime(year=2020, month=2, day=3, hour=0, minute=0, tzinfo=timezone.utc), ), ] revisions = [ models.DatasetExampleRevision( dataset_example_id=examples[0].id, dataset_version_id=versions[0].id, input={"city": "Paris"}, output={}, metadata_={}, revision_kind="CREATE", ), models.DatasetExampleRevision( dataset_example_id=examples[1].id, dataset_version_id=versions[0].id, input={"city": "Tokyo"}, output={}, metadata_={}, revision_kind="CREATE", ), models.DatasetExampleRevision( dataset_example_id=examples[0].id, dataset_version_id=versions[1].id, input={"city": "Cairo"}, output={}, metadata_={}, revision_kind="CREATE", ), models.DatasetExampleRevision( dataset_example_id=examples[2].id, dataset_version_id=versions[0].id, input={"cities": "Madrid"}, output={}, metadata_={}, revision_kind="PATCH", ), ] async with db() as session: session.add(dataset) await session.flush() session.add_all(versions) await session.flush() session.add_all(examples) await session.flush() session.add_all(revisions) await session.flush() @pytest.fixture def cities_and_countries() -> list[tuple[str, str]]: return [ ("Toronto", "Canada"), ("Vancouver", "Canada"), ("Paris", "France"), ("Lyon", "France"), ("Berlin", "Germany"), ("Munich", "Germany"), ("Tokyo", "Japan"), ("Osaka", "Japan"), ("Sydney", "Australia"), ("Melbourne", "Australia"), ("Guadalajara", "Mexico"), ("Moscow", "Russia"), ("Beijing", "China"), ("Shanghai", "China"), ("Mumbai", "India"), ("Delhi", "India"), ("Seoul", "South Korea"), ("Busan", "South Korea"), ] @pytest.fixture async def playground_city_and_country_dataset( cities_and_countries: list[tuple[str, str]], db: DbSessionFactory ) -> None: """ A dataset with many example. """ dataset = models.Dataset( id=1, name="dataset-name", metadata_={}, ) version = models.DatasetVersion( id=1, dataset_id=dataset.id, metadata_={}, ) examples = [ models.DatasetExample( id=example_id, dataset_id=dataset.id, created_at=datetime(year=2020, month=1, day=1, hour=0, minute=0, tzinfo=timezone.utc), ) for example_id in range(1, len(cities_and_countries) + 1) ] revisions = [ models.DatasetExampleRevision( dataset_example_id=example.id, dataset_version_id=version.id, input={"city": city}, output={"country": country}, metadata_={}, revision_kind="CREATE", ) for example, (city, country) in zip(examples, cities_and_countries) ] async with db() as session: session.add(dataset) session.add(version) session.add_all(examples) await session.flush() session.add_all(revisions) await session.flush() @pytest.fixture async def playground_dataset_with_splits(db: DbSessionFactory) -> None: """ A dataset with examples assigned to different splits for testing split-based filtering. Setup: - Dataset with 5 examples - 2 splits: "train" (examples 1, 2, 3) and "test" (examples 4, 5) - Example 3 is intentionally in only train split """ dataset = models.Dataset( id=1, name="dataset-with-splits", metadata_={}, ) version = models.DatasetVersion( id=1, dataset_id=dataset.id, metadata_={}, ) # Create 5 examples examples = [ models.DatasetExample( id=i, dataset_id=dataset.id, created_at=datetime(year=2020, month=1, day=i, hour=0, minute=0, tzinfo=timezone.utc), ) for i in range(1, 6) ] # Create revisions for each example cities = ["Paris", "Tokyo", "Berlin", "London", "Madrid"] revisions = [ models.DatasetExampleRevision( dataset_example_id=example.id, dataset_version_id=version.id, input={"city": city}, output={}, metadata_={}, revision_kind="CREATE", ) for example, city in zip(examples, cities) ] # Create two splits train_split = models.DatasetSplit( id=1, name="train", description="Training split", color="#0000FF", metadata_={}, ) test_split = models.DatasetSplit( id=2, name="test", description="Test split", color="#FF0000", metadata_={}, ) # Assign examples to splits # Train split: examples 1, 2, 3 # Test split: examples 4, 5 split_assignments = [ models.DatasetSplitDatasetExample(dataset_split_id=1, dataset_example_id=1), models.DatasetSplitDatasetExample(dataset_split_id=1, dataset_example_id=2), models.DatasetSplitDatasetExample(dataset_split_id=1, dataset_example_id=3), models.DatasetSplitDatasetExample(dataset_split_id=2, dataset_example_id=4), models.DatasetSplitDatasetExample(dataset_split_id=2, dataset_example_id=5), ] async with db() as session: session.add(dataset) session.add(version) session.add_all(examples) await session.flush() session.add_all(revisions) session.add_all([train_split, test_split]) await session.flush() session.add_all(split_assignments) await session.flush()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server