import { css } from "@emotion/react";
import {
ColumnDef,
flexRender,
getCoreRowModel,
Table,
useReactTable,
} from "@tanstack/react-table";
import { useVirtualizer } from "@tanstack/react-virtual";
import {
memo,
type ReactNode,
type RefObject,
SetStateAction,
Suspense,
useCallback,
useEffect,
useMemo,
useRef,
useState,
} from "react";
import {
Disposable,
graphql,
useLazyLoadQuery,
useMutation,
usePaginationFragment,
useRelayEnvironment,
} from "react-relay";
import { useSearchParams } from "react-router";
import {
GraphQLSubscriptionConfig,
PayloadError,
requestSubscription,
} from "relay-runtime";
import {
Flex,
Icon,
IconButton,
Icons,
Modal,
ModalOverlay,
ParagraphSkeleton,
ProgressCircle,
Text,
View,
} from "@phoenix/components";
import { AlphabeticIndexIcon } from "@phoenix/components/AlphabeticIndexIcon";
import type { AnnotationConfig } from "@phoenix/components/annotation";
import { DynamicContent } from "@phoenix/components/DynamicContent";
import {
type AnnotationError,
type AnnotationWithTrace,
calculateAnnotationListHeight,
calculateEstimatedRowHeight,
CELL_PRIMARY_CONTENT_HEIGHT,
ConnectedExperimentAnnotationAggregates,
ConnectedExperimentCostAndLatencySummary,
ExperimentAnnotationAggregates,
ExperimentAnnotationAggregatesSkeleton,
ExperimentCostAndLatencySummary,
ExperimentCostAndLatencySummarySkeleton,
ExperimentInputCell,
ExperimentReferenceOutputCell,
ExperimentRunCellAnnotationsList,
} from "@phoenix/components/experiment";
import { CellTop, OverflowCell } from "@phoenix/components/table";
import { borderedTableCSS, tableCSS } from "@phoenix/components/table/styles";
import { TableEmpty } from "@phoenix/components/table/TableEmpty";
import {
Tooltip,
TooltipArrow,
TooltipTrigger,
} from "@phoenix/components/tooltip";
import { SpanTokenCosts } from "@phoenix/components/trace";
import { LatencyText } from "@phoenix/components/trace/LatencyText";
import { SpanTokenCount } from "@phoenix/components/trace/SpanTokenCount";
import { SELECTED_SPAN_NODE_ID_PARAM } from "@phoenix/constants/searchParams";
import { useNotifyError } from "@phoenix/contexts";
import { useCredentialsContext } from "@phoenix/contexts/CredentialsContext";
import {
usePlaygroundContext,
usePlaygroundStore,
} from "@phoenix/contexts/PlaygroundContext";
import {
assertUnreachable,
isStringArray,
isStringKeyedObject,
} from "@phoenix/typeUtils";
import {
getErrorMessagesFromRelayMutationError,
getErrorMessagesFromRelaySubscriptionError,
} from "@phoenix/utils/errorUtils";
import {
extractPathsFromDatasetExamples,
getValueAtPath,
} from "@phoenix/utils/objectUtils";
import { ExperimentCompareDetailsDialog } from "../experiment/ExperimentCompareDetailsDialog";
import { ExperimentRepetitionSelector } from "../experiment/ExperimentRepetitionSelector";
import type { PlaygroundDatasetExamplesTableFragment$key } from "./__generated__/PlaygroundDatasetExamplesTableFragment.graphql";
import {
PlaygroundDatasetExamplesTableMutation as PlaygroundDatasetExamplesTableMutationType,
PlaygroundDatasetExamplesTableMutation$data,
} from "./__generated__/PlaygroundDatasetExamplesTableMutation.graphql";
import { PlaygroundDatasetExamplesTableQuery } from "./__generated__/PlaygroundDatasetExamplesTableQuery.graphql";
import { PlaygroundDatasetExamplesTableRefetchQuery } from "./__generated__/PlaygroundDatasetExamplesTableRefetchQuery.graphql";
import PlaygroundDatasetExamplesTableSubscription, {
EvaluatorInputMappingInput,
PlaygroundDatasetExamplesTableSubscription as PlaygroundDatasetExamplesTableSubscriptionType,
PlaygroundDatasetExamplesTableSubscription$data,
} from "./__generated__/PlaygroundDatasetExamplesTableSubscription.graphql";
import {
InstanceVariablesProvider,
useInstanceVariables,
} from "./InstanceVariablesContext";
import {
EvaluationChunk,
ExampleRunData,
InstanceResponses,
makeExpandedCellKey,
type Span,
usePlaygroundDatasetExamplesTableContext,
} from "./PlaygroundDatasetExamplesTableContext";
import { PlaygroundErrorWrap } from "./PlaygroundErrorWrap";
import { PlaygroundInstanceProgressIndicator } from "./PlaygroundInstanceProgressIndicator";
import { PlaygroundRunTraceDetailsDialog } from "./PlaygroundRunTraceDialog";
import {
PartialOutputToolCall,
PlaygroundToolCall,
} from "./PlaygroundToolCall";
import {
extractRootVariable,
getChatCompletionOverDatasetInput,
} from "./playgroundUtils";
const PAGE_SIZE = 10;
/**
* Maximum number of dataset examples to sample for extracting available paths
* for template variable autocomplete. Higher values provide more complete
* autocomplete suggestions but increase computation time.
*/
const MAX_EXAMPLES_FOR_PATH_EXTRACTION = 10;
const outputContentCSS = css`
flex: none;
padding: var(--global-dimension-size-200);
`;
/**
* Get possible variable names based on the template variables path.
*
* @param datasetExample - The full dataset example with input, output, and metadata
* @param templateVariablesPath - The path prefix for template variables
* @returns Array of possible variable names
*/
function getPossibleVariablesForPath({
datasetExample,
templateVariablesPath,
}: {
datasetExample: { input: unknown; output: unknown; metadata: unknown };
templateVariablesPath: string | null;
}): string[] {
// TODO: dynamically parse all valid root-level paths from the dataset example
// instead of hardcoding these known keys
const templateVariablesContext = {
input: datasetExample.input,
reference: datasetExample.output,
metadata: datasetExample.metadata,
};
// Look up the object at the path and return its keys
// When path is empty, getValueAtPath returns the whole context object
const targetObject = getValueAtPath(
templateVariablesContext,
templateVariablesPath ?? ""
);
if (isStringKeyedObject(targetObject)) {
return Object.keys(targetObject);
}
return [];
}
type ChatCompletionOverDatasetMutationPayload =
PlaygroundDatasetExamplesTableMutation$data["chatCompletionOverDataset"];
const createExampleResponsesForInstance = (
response: ChatCompletionOverDatasetMutationPayload
): InstanceResponses => {
return response.examples.reduce<InstanceResponses>(
(instanceResponses, example) => {
const {
datasetExampleId,
repetitionNumber,
experimentRunId,
repetition,
} = example;
const { errorMessage, content, span, toolCalls, evaluations } =
repetition;
// Transform mutation response evaluations to unified EvaluationChunk format
const transformedEvaluations: EvaluationChunk[] = evaluations.map((e) => {
return {
__typename: "EvaluationChunk" as const,
datasetExampleId,
repetitionNumber,
evaluatorName: e.evaluatorName,
experimentRunEvaluation: e.annotation,
trace: e.trace,
error: e.error,
};
});
const updatedInstanceResponses: InstanceResponses = {
...instanceResponses,
[datasetExampleId]: {
...instanceResponses[datasetExampleId],
[repetitionNumber]: {
...instanceResponses[datasetExampleId]?.[repetitionNumber],
experimentRunId,
span,
content,
errorMessage,
toolCalls:
toolCalls.length > 0
? toolCalls.reduce<Record<string, PartialOutputToolCall>>(
(map, toolCall) => {
map[toolCall.id] = toolCall;
return map;
},
{}
)
: undefined,
evaluations: transformedEvaluations,
},
},
};
return updatedInstanceResponses;
},
{}
);
};
/**
* Displays the status indicator in the cell header based on run state.
* - Completed: Shows latency, token count, and cost stats
* - Generating: Shows progress circle with "Generating..." text
* - Cancelled: Shows stop icon with "Cancelled" text
*/
function CellRunStatus({
span,
isRunning,
}: {
span: Span | null | undefined;
isRunning: boolean;
}) {
if (span) {
return (
<Flex direction="row" gap="size-100" alignItems="center" height="100%">
<LatencyText latencyMs={span.latencyMs || 0} size="S" />
<SpanTokenCount
tokenCountTotal={span.tokenCountTotal || 0}
nodeId={span.id}
/>
<SpanTokenCosts
totalCost={span.costSummary?.total?.cost || 0}
spanNodeId={span.id}
/>
</Flex>
);
}
if (isRunning) {
return (
<Flex direction="row" gap="size-100" alignItems="center">
<ProgressCircle isIndeterminate size="S" aria-label="Generating" />
<Text color="text-500" fontStyle="italic">
Generating...
</Text>
</Flex>
);
}
return (
<Flex
direction="row"
gap="size-100"
alignItems="center"
css={css`
color: var(--global-text-color-500);
`}
>
<Icon svg={<Icons.MinusCircleOutline />} />
<Text color="inherit">Cancelled</Text>
</Flex>
);
}
function EmptyExampleOutput({
isRunning,
instanceVariables,
datasetExample,
templateVariablesPath,
evaluatorOutputConfigs,
isExpanded,
onExpandedChange,
}: {
isRunning: boolean;
instanceVariables: string[];
datasetExample: { input: unknown; output: unknown; metadata: unknown };
templateVariablesPath: string | null;
evaluatorOutputConfigs: AnnotationConfig[];
isExpanded: boolean;
onExpandedChange: (isExpanded: boolean) => void;
}) {
// Build the template variables context matching the backend mapping
// (output is renamed to reference)
const templateVariablesContext = useMemo(
() => ({
input: datasetExample.input,
reference: datasetExample.output,
metadata: datasetExample.metadata,
}),
[datasetExample]
);
// Get the target object based on the template variables path
const targetObject = useMemo((): Record<string, unknown> => {
if (!templateVariablesPath) {
return templateVariablesContext as Record<string, unknown>;
}
const value = getValueAtPath(
templateVariablesContext,
templateVariablesPath
);
return isStringKeyedObject(value) ? value : {};
}, [templateVariablesContext, templateVariablesPath]);
// Get possible variables based on the path
const possibleVariables = useMemo(() => {
return getPossibleVariablesForPath({
datasetExample,
templateVariablesPath,
});
}, [datasetExample, templateVariablesPath]);
const missingVariables = useMemo(() => {
// Extract root variable from paths (e.g., "input.input.messages" -> "input")
// and check if the root variable exists in the target object
return instanceVariables.filter((variable) => {
const rootVariable = extractRootVariable(variable);
return targetObject[rootVariable] == null;
});
}, [targetObject, instanceVariables]);
let cellTopContent: ReactNode | null = <Text color="text-500">Ready</Text>;
let content: ReactNode | null = (
<Text color="text-500">Press run to generate</Text>
);
if (isRunning) {
content = <ParagraphSkeleton lines={4} />;
cellTopContent = (
<Flex direction="row" gap="size-100" alignItems="center">
<Icon svg={<Icons.LoaderOutline />} />
<Text color="text-500">Queued</Text>
</Flex>
);
}
if (missingVariables.length > 0) {
cellTopContent = <Text color="danger">Missing variables</Text>;
content = (
<PlaygroundErrorWrap>
{`Dataset is missing input for variable${missingVariables.length > 1 ? "s" : ""}: ${missingVariables.join(
", "
)}.${
possibleVariables.length > 0
? ` Possible inputs start with: ${possibleVariables.join(", ")}`
: " No inputs found in dataset example."
}`}
</PlaygroundErrorWrap>
);
}
return (
<Flex direction="column" height="100%">
<CellTop>{cellTopContent}</CellTop>
<OverflowCell
height={CELL_PRIMARY_CONTENT_HEIGHT}
isExpanded={isExpanded}
onExpandedChange={onExpandedChange}
>
<div css={outputContentCSS}>{content}</div>
</OverflowCell>
<ExperimentRunCellAnnotationsList
annotations={[]}
annotationConfigs={evaluatorOutputConfigs}
executionState={isRunning ? "running" : "idle"}
/>
</Flex>
);
}
function ExampleOutputContent({
exampleData,
repetitionNumber,
setRepetitionNumber,
totalRepetitions,
onViewExperimentRunDetailsPress,
onViewTracePress,
evaluatorOutputConfigs,
isRunning,
isExpanded,
onExpandedChange,
}: {
exampleData: ExampleRunData;
repetitionNumber: number;
setRepetitionNumber: (n: SetStateAction<number>) => void;
totalRepetitions: number;
onViewExperimentRunDetailsPress: () => void;
onViewTracePress: (
traceId: string,
projectId: string,
evaluatorName?: string
) => void;
evaluatorOutputConfigs: AnnotationConfig[];
isRunning: boolean;
isExpanded: boolean;
onExpandedChange: (isExpanded: boolean) => void;
}) {
const {
span,
content,
toolCalls,
errorMessage,
experimentRunId,
evaluations,
} = exampleData;
const hasSpan = span != null;
const hasExperimentRun = experimentRunId != null;
const spanControls = useMemo(() => {
return (
<>
{totalRepetitions > 1 && (
<ExperimentRepetitionSelector
repetitionNumber={repetitionNumber}
totalRepetitions={totalRepetitions}
setRepetitionNumber={setRepetitionNumber}
/>
)}
<TooltipTrigger isDisabled={!hasExperimentRun}>
<IconButton
size="S"
aria-label="View experiment run details"
isDisabled={!hasExperimentRun}
onPress={onViewExperimentRunDetailsPress}
>
<Icon svg={<Icons.ExpandOutline />} />
</IconButton>
<Tooltip>
<TooltipArrow />
view experiment run
</Tooltip>
</TooltipTrigger>
<TooltipTrigger isDisabled={!hasSpan}>
<IconButton
size="S"
aria-label="View run trace"
isDisabled={!hasSpan}
onPress={() => {
if (span) {
onViewTracePress(span.context.traceId, span.project.id);
}
}}
>
<Icon svg={<Icons.Trace />} />
</IconButton>
<Tooltip>
<TooltipArrow />
view run trace
</Tooltip>
</TooltipTrigger>
</>
);
}, [
hasExperimentRun,
hasSpan,
repetitionNumber,
setRepetitionNumber,
span,
totalRepetitions,
onViewExperimentRunDetailsPress,
onViewTracePress,
]);
const { successfulEvaluations, evaluationErrors } = useMemo(() => {
const successful: AnnotationWithTrace[] = [];
const errors: AnnotationError[] = [];
for (const e of evaluations ?? []) {
if (e.error != null) {
errors.push({
evaluatorName: e.evaluatorName,
message: e.error,
trace: e.trace,
});
} else if (e.experimentRunEvaluation != null) {
const evaluation = e.experimentRunEvaluation;
successful.push({
...evaluation,
trace: e.trace,
});
}
}
return { successfulEvaluations: successful, evaluationErrors: errors };
}, [evaluations]);
return (
<Flex direction="column" height="100%">
<CellTop extra={spanControls}>
<CellRunStatus span={span} isRunning={isRunning} />
</CellTop>
<OverflowCell
height={CELL_PRIMARY_CONTENT_HEIGHT}
isExpanded={isExpanded}
onExpandedChange={onExpandedChange}
>
<div css={outputContentCSS}>
<Flex direction={"column"} gap="size-100">
{errorMessage != null ? (
<PlaygroundErrorWrap key="error-message">
{errorMessage}
</PlaygroundErrorWrap>
) : null}
{content != null ? (
<DynamicContent value={content} key="content" />
) : null}
{toolCalls != null && Object.keys(toolCalls).length > 0
? Object.values(toolCalls)
.filter((tc): tc is PartialOutputToolCall => tc != null)
.map((toolCall) => (
<View key={toolCall.id}>
<PlaygroundToolCall toolCall={toolCall} />
</View>
))
: null}
</Flex>
</div>
</OverflowCell>
<ExperimentRunCellAnnotationsList
annotations={successfulEvaluations}
annotationErrors={evaluationErrors}
annotationConfigs={evaluatorOutputConfigs}
executionState={isRunning ? "running" : "idle"}
onTraceClick={({ traceId, projectId, annotationName }) => {
if (traceId && projectId) {
onViewTracePress(traceId, projectId, annotationName);
}
}}
/>
</Flex>
);
}
const MemoizedExampleOutputCell = memo(function ExampleOutputCell({
isRunning,
instanceId,
exampleId,
datasetExample,
templateVariablesPath,
onViewExperimentRunDetailsPress,
onViewTracePress,
evaluatorOutputConfigs,
}: {
instanceId: number;
exampleId: string;
isRunning: boolean;
datasetExample: { input: unknown; output: unknown; metadata: unknown };
templateVariablesPath: string | null;
onViewExperimentRunDetailsPress: () => void;
onViewTracePress: (
traceId: string,
projectId: string,
evaluatorName?: string
) => void;
evaluatorOutputConfigs: AnnotationConfig[];
}) {
const instanceVariables = useInstanceVariables(instanceId);
const [repetitionNumber, setRepetitionNumber] = useState(1);
const totalRepetitions = usePlaygroundDatasetExamplesTableContext(
(state) => state.repetitions
);
const examplesByRepetitionNumber = usePlaygroundDatasetExamplesTableContext(
(store) => store.exampleResponsesMap[instanceId]?.[exampleId]
);
const expandedCellKey = makeExpandedCellKey(
instanceId,
exampleId,
repetitionNumber
);
const isExpanded = usePlaygroundDatasetExamplesTableContext(
(state) => state.expandedCells[expandedCellKey] ?? false
);
const setExpandedCell = usePlaygroundDatasetExamplesTableContext(
(state) => state.setExpandedCell
);
const onExpandedChange = useCallback(
(expanded: boolean) => {
setExpandedCell({
instanceId,
exampleId,
repetitionNumber,
isExpanded: expanded,
});
},
[setExpandedCell, instanceId, exampleId, repetitionNumber]
);
const exampleData = useMemo(() => {
return examplesByRepetitionNumber?.[repetitionNumber];
}, [examplesByRepetitionNumber, repetitionNumber]);
return exampleData == null ? (
<EmptyExampleOutput
isRunning={isRunning}
instanceVariables={instanceVariables}
datasetExample={datasetExample}
templateVariablesPath={templateVariablesPath}
evaluatorOutputConfigs={evaluatorOutputConfigs}
isExpanded={isExpanded}
onExpandedChange={onExpandedChange}
/>
) : (
<ExampleOutputContent
exampleData={exampleData}
repetitionNumber={repetitionNumber}
totalRepetitions={totalRepetitions}
setRepetitionNumber={setRepetitionNumber}
onViewExperimentRunDetailsPress={onViewExperimentRunDetailsPress}
onViewTracePress={onViewTracePress}
evaluatorOutputConfigs={evaluatorOutputConfigs}
isRunning={isRunning}
isExpanded={isExpanded}
onExpandedChange={onExpandedChange}
/>
);
});
// un-memoized normal table body component - see memoized version below
function TableBody<T>({
table,
tableContainerRef,
estimatedRowHeight,
}: {
table: Table<T>;
tableContainerRef: RefObject<HTMLDivElement | null>;
estimatedRowHeight: number;
}) {
"use no memo";
const rows = table.getRowModel().rows;
// eslint-disable-next-line react-hooks/incompatible-library
const virtualizer = useVirtualizer({
count: rows.length,
getScrollElement: () => tableContainerRef.current,
estimateSize: () => estimatedRowHeight,
overscan: 5,
});
const virtualRows = virtualizer.getVirtualItems();
const totalHeight = virtualizer.getTotalSize();
const spacerRowHeight = useMemo(() => {
return totalHeight - virtualRows.reduce((acc, item) => acc + item.size, 0);
}, [totalHeight, virtualRows]);
return (
<tbody>
{virtualRows.map((virtualRow, index) => {
const row = rows[virtualRow.index];
return (
<tr
key={row.id}
style={{
height: `${virtualRow.size}px`,
transform: `translateY(${
virtualRow.start - index * virtualRow.size
}px)`,
}}
>
{row.getVisibleCells().map((cell) => {
return (
<td
key={cell.id}
style={{
padding: 0,
verticalAlign: "top",
width: `calc(var(--col-${cell.column.id}-size) * 1px)`,
maxWidth: `calc(var(--col-${cell.column.id}-size) * 1px)`,
minWidth: 0,
// allow long text with no symbols or spaces to wrap
// otherwise, it will prevent the cell from shrinking
// an alternative solution would be to set a max-width and allow
// the cell to scroll itself
wordBreak: "break-all",
}}
>
{flexRender(cell.column.columnDef.cell, cell.getContext())}
</td>
);
})}
</tr>
);
})}
<tr>
<td
style={{
height: `${spacerRowHeight}px`,
padding: 0,
}}
colSpan={table.getAllColumns().length}
/>
</tr>
</tbody>
);
}
// special memoized wrapper for our table body that we will use during column resizing
export const MemoizedTableBody = memo(
TableBody,
(prev, next) => prev.table.options.data === next.table.options.data
) as typeof TableBody;
export function PlaygroundDatasetExamplesTable({
datasetId,
splitIds,
evaluatorMappings,
evaluatorOutputConfigs,
}: {
datasetId: string;
splitIds?: string[];
evaluatorOutputConfigs: AnnotationConfig[];
/**
* Record of evaluator id to name and input mappings
*/
evaluatorMappings: Record<
string,
{ name: string; inputMapping: EvaluatorInputMappingInput }
>;
}) {
const environment = useRelayEnvironment();
const instances = usePlaygroundContext((state) => state.instances);
const { baseExperimentId, compareExperimentIds } = useMemo(() => {
const experimentIds = instances.map((instance) => instance.experimentId);
const [baseExperimentId, ...compareExperimentIds] = experimentIds;
return { baseExperimentId, compareExperimentIds };
}, [instances]);
const [selectedExampleIndex, setSelectedExampleIndex] = useState<
number | null
>(null);
const [selectedTraceInfo, setSelectedTraceInfo] = useState<{
traceId: string;
projectId: string;
evaluatorName?: string;
} | null>(null);
const playgroundDatasetState = usePlaygroundContext((state) =>
datasetId ? state.stateByDatasetId[datasetId] : null
);
const { templateVariablesPath } = playgroundDatasetState ?? {};
const setAvailablePaths = usePlaygroundContext(
(state) => state.setAvailablePaths
);
const numEnabledEvaluators = evaluatorOutputConfigs.length;
const annotationListHeight =
calculateAnnotationListHeight(numEnabledEvaluators);
const updateInstance = usePlaygroundContext((state) => state.updateInstance);
const updateExampleData = usePlaygroundDatasetExamplesTableContext(
(state) => state.updateExampleData
);
const setExampleDataForInstance = usePlaygroundDatasetExamplesTableContext(
(state) => state.setExampleDataForInstance
);
const resetData = usePlaygroundDatasetExamplesTableContext(
(state) => state.resetData
);
const appendExampleDataToolCallChunk =
usePlaygroundDatasetExamplesTableContext(
(state) => state.appendExampleDataToolCallChunk
);
const appendExampleDataTextChunk = usePlaygroundDatasetExamplesTableContext(
(state) => state.appendExampleDataTextChunk
);
const appendExampleDataEvaluationChunk =
usePlaygroundDatasetExamplesTableContext(
(state) => state.appendExampleDataEvaluationChunk
);
const setRepetitions = usePlaygroundDatasetExamplesTableContext(
(state) => state.setRepetitions
);
const repetitions = usePlaygroundContext((state) => state.repetitions);
const [, setSearchParams] = useSearchParams();
const hasSomeRunIds = instances.some(
(instance) => instance.activeRunId !== null
);
const credentials = useCredentialsContext((state) => state);
const markPlaygroundInstanceComplete = usePlaygroundContext(
(state) => state.markPlaygroundInstanceComplete
);
const playgroundStore = usePlaygroundStore();
const notifyError = useNotifyError();
const { dataset } = useLazyLoadQuery<PlaygroundDatasetExamplesTableQuery>(
graphql`
query PlaygroundDatasetExamplesTableQuery($datasetId: ID!, $splitIds: [ID!]) {
dataset: node(id: $datasetId) {
...PlaygroundDatasetExamplesTableFragment @arguments(splitIds: $splitIds)
... on Dataset {
exampleCount(splitIds: $splitIds)
latestVersions: versions(first: 1, sort: { col: createdAt, dir: desc }) {
edges {
version: node {
id
}
}
}
}
}
}
`,
{ datasetId, splitIds: splitIds ?? null }
);
const exampleCount = dataset.exampleCount ?? 0;
const evaluatorCount = Object.keys(evaluatorMappings).length;
const incrementRunsCompleted = usePlaygroundContext(
(state) => state.incrementRunsCompleted
);
const incrementRunsFailed = usePlaygroundContext(
(state) => state.incrementRunsFailed
);
const incrementEvalsCompleted = usePlaygroundContext(
(state) => state.incrementEvalsCompleted
);
const initExperimentRunProgress = usePlaygroundContext(
(state) => state.initExperimentRunProgress
);
const onNext = useCallback(
(instanceId: number) =>
(response?: PlaygroundDatasetExamplesTableSubscription$data | null) => {
if (response == null) {
return;
}
const chatCompletion = response.chatCompletionOverDataset;
switch (chatCompletion.__typename) {
case "ChatCompletionSubscriptionExperiment":
updateInstance({
instanceId,
patch: { experimentId: chatCompletion.experiment.id },
dirty: null,
});
break;
case "ChatCompletionSubscriptionResult":
if (chatCompletion.datasetExampleId == null) {
return;
}
updateExampleData({
instanceId,
exampleId: chatCompletion.datasetExampleId,
repetitionNumber: chatCompletion.repetitionNumber ?? 1,
patch: {
span: chatCompletion.span,
experimentRunId: chatCompletion.experimentRun?.id,
},
});
incrementRunsCompleted(instanceId);
break;
case "ChatCompletionSubscriptionError":
if (chatCompletion.datasetExampleId == null) {
return;
}
updateExampleData({
instanceId,
exampleId: chatCompletion.datasetExampleId,
repetitionNumber: chatCompletion.repetitionNumber ?? 1,
patch: { errorMessage: chatCompletion.message },
});
incrementRunsFailed(instanceId);
break;
case "TextChunk":
if (chatCompletion.datasetExampleId == null) {
return;
}
appendExampleDataTextChunk({
instanceId,
exampleId: chatCompletion.datasetExampleId,
repetitionNumber: chatCompletion.repetitionNumber ?? 1,
textChunk: chatCompletion.content,
});
break;
case "ToolCallChunk": {
if (chatCompletion.datasetExampleId == null) {
return;
}
appendExampleDataToolCallChunk({
instanceId,
exampleId: chatCompletion.datasetExampleId,
repetitionNumber: chatCompletion.repetitionNumber ?? 1,
toolCallChunk: chatCompletion,
});
break;
}
case "EvaluationChunk": {
if (chatCompletion.datasetExampleId == null) {
return;
}
appendExampleDataEvaluationChunk({
instanceId,
exampleId: chatCompletion.datasetExampleId,
repetitionNumber: chatCompletion.repetitionNumber ?? 1,
evaluationChunk: chatCompletion,
});
incrementEvalsCompleted(instanceId);
break;
}
// This should never happen
// As relay puts it in generated files "This will never be '%other', but we need some value in case none of the concrete values match."
case "%other":
return;
default:
return assertUnreachable(chatCompletion);
}
},
[
appendExampleDataTextChunk,
appendExampleDataToolCallChunk,
appendExampleDataEvaluationChunk,
incrementEvalsCompleted,
incrementRunsCompleted,
incrementRunsFailed,
updateExampleData,
updateInstance,
]
);
const [generateChatCompletion] =
useMutation<PlaygroundDatasetExamplesTableMutationType>(graphql`
mutation PlaygroundDatasetExamplesTableMutation(
$input: ChatCompletionOverDatasetInput!
) {
chatCompletionOverDataset(input: $input) {
experimentId
examples {
datasetExampleId
experimentRunId
repetitionNumber
repetition {
content
errorMessage
span {
id
tokenCountTotal
costSummary {
total {
cost
}
}
latencyMs
project {
id
}
context {
traceId
}
}
toolCalls {
id
function {
name
arguments
}
}
evaluations {
evaluatorName
annotation {
id
name
label
score
annotatorKind
explanation
metadata
startTime
}
trace {
traceId
projectId
}
error
}
}
}
}
}
`);
const onCompleted = useCallback(
(instanceId: number) =>
(
response: PlaygroundDatasetExamplesTableMutation$data,
errors: PayloadError[] | null
) => {
markPlaygroundInstanceComplete(instanceId);
setRepetitions(repetitions);
if (errors) {
notifyError({
title: "Chat completion failed",
message: errors[0].message,
});
return;
}
updateInstance({
instanceId,
patch: {
experimentId: response.chatCompletionOverDataset.experimentId,
},
dirty: null,
});
setExampleDataForInstance({
instanceId,
data: createExampleResponsesForInstance(
response.chatCompletionOverDataset
),
});
},
[
markPlaygroundInstanceComplete,
notifyError,
repetitions,
setExampleDataForInstance,
setRepetitions,
updateInstance,
]
);
useEffect(() => {
if (!hasSomeRunIds) {
return;
}
const { instances, streaming, updateInstance } = playgroundStore.getState();
resetData();
// Calculate total runs and evals for progress tracking
const totalRuns = exampleCount * repetitions;
const totalEvals = exampleCount * repetitions * evaluatorCount;
if (streaming) {
const subscriptions: Disposable[] = [];
for (const instance of instances) {
const { activeRunId } = instance;
updateInstance({
instanceId: instance.id,
patch: { experimentId: null },
dirty: null,
});
if (activeRunId === null) {
continue;
}
// Initialize progress for this instance
initExperimentRunProgress(instance.id, {
totalRuns,
runsCompleted: 0,
runsFailed: 0,
totalEvals,
evalsCompleted: 0,
evalsFailed: 0,
});
const variables = {
input: getChatCompletionOverDatasetInput({
credentials,
instanceId: instance.id,
playgroundStore,
datasetId,
splitIds,
evaluatorMappings,
}),
};
const config: GraphQLSubscriptionConfig<PlaygroundDatasetExamplesTableSubscriptionType> =
{
subscription: PlaygroundDatasetExamplesTableSubscription,
variables,
onNext: onNext(instance.id),
onCompleted: () => {
markPlaygroundInstanceComplete(instance.id);
},
onError: (error) => {
markPlaygroundInstanceComplete(instance.id);
const errorMessages =
getErrorMessagesFromRelaySubscriptionError(error);
if (errorMessages != null && errorMessages.length > 0) {
notifyError({
title: "Failed to get output",
message: errorMessages.join("\n"),
expireMs: 10000,
});
} else {
notifyError({
title: "Failed to get output",
message: error.message,
expireMs: 10000,
});
}
},
};
setRepetitions(repetitions);
const subscription = requestSubscription(environment, config);
subscriptions.push(subscription);
}
return () => {
for (const subscription of subscriptions) {
subscription.dispose();
}
};
} else {
const disposables: Disposable[] = [];
for (const instance of instances) {
const { activeRunId } = instance;
if (activeRunId === null) {
continue;
}
updateInstance({
instanceId: instance.id,
patch: { experimentId: null },
dirty: null,
});
// Initialize progress for this instance (non-streaming mode)
initExperimentRunProgress(instance.id, {
totalRuns,
runsCompleted: 0,
runsFailed: 0,
totalEvals,
evalsCompleted: 0,
evalsFailed: 0,
});
const variables = {
input: getChatCompletionOverDatasetInput({
credentials,
instanceId: instance.id,
playgroundStore,
datasetId,
splitIds,
evaluatorMappings,
}),
};
const disposable = generateChatCompletion({
variables,
onCompleted: onCompleted(instance.id),
onError(error) {
markPlaygroundInstanceComplete(instance.id);
const errorMessages = getErrorMessagesFromRelayMutationError(error);
if (errorMessages != null && errorMessages.length > 0) {
notifyError({
title: "Failed to get output",
message: errorMessages.join("\n"),
expireMs: 10000,
});
} else {
notifyError({
title: "Failed to get output",
message: error.message,
expireMs: 10000,
});
}
},
});
disposables.push(disposable);
}
return () => {
for (const disposable of disposables) {
disposable.dispose();
}
};
}
}, [
credentials,
datasetId,
splitIds,
environment,
evaluatorCount,
evaluatorMappings,
exampleCount,
generateChatCompletion,
hasSomeRunIds,
initExperimentRunProgress,
markPlaygroundInstanceComplete,
notifyError,
onCompleted,
onNext,
playgroundStore,
repetitions,
resetData,
setRepetitions,
]);
// Use useState + callback ref instead of useRef so that the component
// re-renders when the container element mounts (needed for virtualizer)
const [_tableContainerEl, setTableContainerEl] =
useState<HTMLDivElement | null>(null);
const tableContainerRef = useRef<HTMLDivElement | null>(null);
const tableContainerCallbackRef = useCallback((el: HTMLDivElement | null) => {
tableContainerRef.current = el;
setTableContainerEl(el);
}, []);
const { data, loadNext, hasNext, isLoadingNext } = usePaginationFragment<
PlaygroundDatasetExamplesTableRefetchQuery,
PlaygroundDatasetExamplesTableFragment$key
>(
graphql`
fragment PlaygroundDatasetExamplesTableFragment on Dataset
@refetchable(queryName: "PlaygroundDatasetExamplesTableRefetchQuery")
@argumentDefinitions(
datasetVersionId: { type: "ID" }
splitIds: { type: "[ID!]" }
after: { type: "String", defaultValue: null }
first: { type: "Int", defaultValue: 20 }
) {
examples(
datasetVersionId: $datasetVersionId
splitIds: $splitIds
first: $first
after: $after
) @connection(key: "PlaygroundDatasetExamplesTable_examples") {
edges {
example: node {
id
revision {
input
output
metadata
}
}
}
}
}
`,
dataset
);
// Refetch the data when the dataset version changes
const tableData = useMemo(
() =>
data.examples.edges.map((edge) => {
const example = edge.example;
const revision = example.revision;
return {
id: example.id,
input: revision.input,
output: revision.output,
metadata: revision.metadata,
};
}),
[data]
);
type TableRow = (typeof tableData)[number];
const exampleIds = useMemo(() => {
return tableData.map((row) => row.id);
}, [tableData]);
// Compute and cache available paths for template autocomplete when examples are loaded
// or when templateVariablesPath changes (which scopes the paths)
useEffect(() => {
if (tableData.length > 0) {
const examples = tableData.map((row) => ({
input: row.input,
reference: row.output,
metadata: row.metadata,
}));
const paths = extractPathsFromDatasetExamples(
examples,
templateVariablesPath,
MAX_EXAMPLES_FOR_PATH_EXTRACTION
);
setAvailablePaths({ availablePaths: paths, datasetId });
} else {
// Clear paths when dataset becomes empty to avoid stale autocomplete suggestions
setAvailablePaths({ availablePaths: [], datasetId });
}
}, [tableData, datasetId, templateVariablesPath, setAvailablePaths]);
// We assume that the experiments were run on the latest version of the dataset.
// This is subject to a race condition where a new dataset version is created after the playground experiments were run.
// We ignore this edge case for now.
const datasetVersionId = useMemo(() => {
return dataset.latestVersions?.edges[0].version.id ?? "";
}, [dataset.latestVersions?.edges]);
const playgroundInstanceOutputColumns = useMemo((): ColumnDef<TableRow>[] => {
return instances.map((instance, index) => {
const isRunning = instance.activeRunId !== null;
const experimentId = instance.experimentId;
return {
id: `instance-${instance.id}`,
header: () => (
<Flex direction="column" gap="size-50" width="100%">
<Flex
direction="row"
gap="size-100"
alignItems="center"
justifyContent="space-between"
width="100%"
>
<Flex direction="row" gap="size-100" alignItems="center">
<AlphabeticIndexIcon index={index} size="XS" />
<span>Output</span>
</Flex>
<PlaygroundInstanceProgressIndicator instanceId={instance.id} />
</Flex>
{isRunning ? (
<ExperimentCostAndLatencySummary executionState="running" />
) : experimentId != null ? (
<Suspense fallback={<ExperimentCostAndLatencySummarySkeleton />}>
<ConnectedExperimentCostAndLatencySummary
experimentId={experimentId}
/>
</Suspense>
) : (
<ExperimentCostAndLatencySummary executionState="idle" />
)}
{isRunning ? (
<ExperimentAnnotationAggregatesSkeleton
annotationConfigs={evaluatorOutputConfigs}
/>
) : experimentId != null ? (
<Suspense
fallback={
<ExperimentAnnotationAggregatesSkeleton
annotationConfigs={evaluatorOutputConfigs}
/>
}
>
<ConnectedExperimentAnnotationAggregates
experimentId={experimentId}
annotationConfigs={evaluatorOutputConfigs}
/>
</Suspense>
) : (
<ExperimentAnnotationAggregates
executionState="idle"
annotationConfigs={evaluatorOutputConfigs}
/>
)}
</Flex>
),
cell: ({ row }) => {
return (
<MemoizedExampleOutputCell
instanceId={instance.id}
exampleId={row.original.id}
isRunning={hasSomeRunIds}
datasetExample={{
input: row.original.input,
output: row.original.output,
metadata: row.original.metadata,
}}
templateVariablesPath={templateVariablesPath ?? null}
evaluatorOutputConfigs={evaluatorOutputConfigs}
onViewExperimentRunDetailsPress={() => {
setSelectedExampleIndex(row.index);
}}
onViewTracePress={(traceId, projectId, evaluatorName) => {
setSelectedTraceInfo({ traceId, projectId, evaluatorName });
}}
/>
);
},
size: 500,
};
});
}, [
hasSomeRunIds,
instances,
templateVariablesPath,
setSelectedExampleIndex,
evaluatorOutputConfigs,
]);
const columns: ColumnDef<TableRow>[] = useMemo(
() => [
{
header: "input",
accessorKey: "input",
cell: ({ row }) => (
<ExperimentInputCell
exampleId={row.original.id}
value={row.original.input}
height={CELL_PRIMARY_CONTENT_HEIGHT + annotationListHeight}
onExpand={() => {
setSearchParams((prev) => {
prev.set("exampleId", row.original.id);
return prev;
});
}}
/>
),
size: 200,
},
{
header: () => (
<Flex direction="column" gap="size-50">
<span>reference output</span>
<ExperimentCostAndLatencySummary
executionState="idle"
isPlaceholder={true}
/>
<ExperimentAnnotationAggregates
executionState="idle"
annotationConfigs={evaluatorOutputConfigs}
isPlaceholder={true}
/>
</Flex>
),
accessorKey: "output",
cell: ({ row }) => (
<ExperimentReferenceOutputCell
value={row.original.output}
height={CELL_PRIMARY_CONTENT_HEIGHT + annotationListHeight}
/>
),
size: 200,
},
...playgroundInstanceOutputColumns,
],
[
annotationListHeight,
evaluatorOutputConfigs,
playgroundInstanceOutputColumns,
setSearchParams,
]
);
const table = useReactTable<TableRow>({
columns,
data: tableData,
getCoreRowModel: getCoreRowModel(),
columnResizeMode: "onChange",
});
const rows = table.getRowModel().rows;
const isEmpty = rows.length === 0;
const estimatedRowHeight = calculateEstimatedRowHeight(numEnabledEvaluators);
const fetchMoreOnBottomReached = useCallback(
(containerRefElement?: HTMLDivElement | null) => {
if (containerRefElement) {
const { scrollHeight, scrollTop, clientHeight } = containerRefElement;
// once the user has scrolled within 300px of the bottom of the table, fetch more data if there is any
if (
scrollHeight - scrollTop - clientHeight < 300 &&
!isLoadingNext &&
hasNext
) {
loadNext(PAGE_SIZE);
}
}
},
[hasNext, isLoadingNext, loadNext]
);
/**
* Instead of calling `column.getSize()` on every render for every header
* and especially every data cell (very expensive),
* we will calculate all column sizes at once at the root table level in a useMemo
* and pass the column sizes down as CSS variables to the <table> element.
* @see https://tanstack.com/table/v8/docs/framework/react/examples/column-resizing-performant
*/
const columnSizeVars = useMemo(() => {
const headers = table.getFlatHeaders();
const colSizes: { [key: string]: number } = {};
for (let i = 0; i < headers.length; i++) {
const header = headers[i]!;
colSizes[`--header-${header.id}-size`] = header.getSize();
colSizes[`--col-${header.column.id}-size`] = header.column.getSize();
}
return colSizes;
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [
// eslint-disable-next-line react-hooks/exhaustive-deps
table.getState().columnSizingInfo,
// eslint-disable-next-line react-hooks/exhaustive-deps
table.getState().columnSizing,
columns.length,
]);
return (
<InstanceVariablesProvider>
<div
css={css`
flex: 1 1 auto;
overflow: auto;
height: 100%;
scrollbar-gutter: stable;
`}
ref={tableContainerCallbackRef}
onScroll={(e) => fetchMoreOnBottomReached(e.target as HTMLDivElement)}
>
<table
css={css(tableCSS, borderedTableCSS)}
style={{
...columnSizeVars,
width: table.getTotalSize(),
minWidth: "100%",
}}
>
<thead>
{table.getHeaderGroups().map((headerGroup) => (
<tr key={headerGroup.id}>
{headerGroup.headers.map((header) => (
<th
key={header.id}
style={{
width: `calc(var(--header-${header?.id}-size) * 1px)`,
}}
>
<div>
{flexRender(
header.column.columnDef.header,
header.getContext()
)}
</div>
<div
{...{
onMouseDown: header.getResizeHandler(),
onTouchStart: header.getResizeHandler(),
className: `resizer ${
header.column.getIsResizing() ? "isResizing" : ""
}`,
}}
/>
</th>
))}
</tr>
))}
</thead>
{isEmpty ? (
<TableEmpty />
) : table.getState().columnSizingInfo.isResizingColumn ? (
<MemoizedTableBody
table={table}
tableContainerRef={tableContainerRef}
estimatedRowHeight={estimatedRowHeight}
/>
) : (
<TableBody
table={table}
tableContainerRef={tableContainerRef}
estimatedRowHeight={estimatedRowHeight}
/>
)}
</table>
<ModalOverlay
isOpen={selectedExampleIndex !== null}
onOpenChange={(isOpen) => {
if (!isOpen) {
setSelectedExampleIndex(null);
}
}}
>
<Modal variant="slideover" size="fullscreen">
{selectedExampleIndex !== null &&
exampleIds[selectedExampleIndex] &&
baseExperimentId != null &&
isStringArray(compareExperimentIds) && (
<ExperimentCompareDetailsDialog
datasetId={datasetId}
datasetVersionId={datasetVersionId}
selectedExampleIndex={selectedExampleIndex}
selectedExampleId={exampleIds[selectedExampleIndex]}
baseExperimentId={baseExperimentId}
compareExperimentIds={compareExperimentIds}
exampleIds={exampleIds}
onExampleChange={(exampleIndex) => {
if (
exampleIndex === exampleIds.length - 1 &&
!isLoadingNext &&
hasNext
) {
loadNext(PAGE_SIZE);
}
if (exampleIndex >= 0 && exampleIndex < exampleIds.length) {
setSelectedExampleIndex(exampleIndex);
}
}}
openTraceDialog={(traceId, projectId) => {
setSelectedTraceInfo({ traceId, projectId });
}}
/>
)}
</Modal>
</ModalOverlay>
<ModalOverlay
isOpen={selectedTraceInfo !== null}
onOpenChange={(isOpen) => {
if (!isOpen) {
setSelectedTraceInfo(null);
setSearchParams(
(prev) => {
const newParams = new URLSearchParams(prev);
newParams.delete(SELECTED_SPAN_NODE_ID_PARAM);
return newParams;
},
{ replace: true }
);
}
}}
>
<Modal variant="slideover" size="fullscreen">
{selectedTraceInfo && (
<PlaygroundRunTraceDetailsDialog
traceId={selectedTraceInfo.traceId}
projectId={selectedTraceInfo.projectId}
title={
selectedTraceInfo.evaluatorName
? `Evaluator Trace: ${selectedTraceInfo.evaluatorName}`
: "Experiment Run Trace"
}
/>
)}
</Modal>
</ModalOverlay>
</div>
</InstanceVariablesProvider>
);
}
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
graphql`
subscription PlaygroundDatasetExamplesTableSubscription(
$input: ChatCompletionOverDatasetInput!
) {
chatCompletionOverDataset(input: $input) {
__typename
... on TextChunk {
content
datasetExampleId
repetitionNumber
}
... on ToolCallChunk {
id
datasetExampleId
repetitionNumber
function {
name
arguments
}
}
... on ChatCompletionSubscriptionExperiment {
experiment {
id
}
}
... on ChatCompletionSubscriptionResult {
datasetExampleId
repetitionNumber
span {
id
tokenCountTotal
costSummary {
total {
cost
}
}
latencyMs
project {
id
}
context {
traceId
}
}
experimentRun {
id
}
}
... on ChatCompletionSubscriptionError {
datasetExampleId
repetitionNumber
message
}
... on EvaluationChunk {
datasetExampleId
repetitionNumber
evaluatorName
experimentRunEvaluation {
id
name
label
score
annotatorKind
explanation
metadata
startTime
}
trace {
traceId
projectId
}
error
}
}
}
`;