MCP Terminal Server

MIT License
Overview InspectNew Schema Related Servers Reviews Score
/**
 * Copyright 2024 Google LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import {
  afterEach,
  beforeEach,
  describe,
  expect,
  it,
  jest,
} from '@jest/globals';
import fs from 'fs';
import { toCsv, toJson } from '../../src/eval/exporter';
import { EvalResult, EvalRun, EvalRunKey } from '../../src/types/eval';

jest.mock('crypto', () => {
  return {
    createHash: jest.fn().mockReturnThis(),
    update: jest.fn().mockReturnThis(),
    digest: jest.fn(() => 'store-root'),
  };
});

const EVAL_RESULTS: EvalResult[] = [
  {
    testCaseId: 'alakjdshfalsdkjh',
    input: { subject: 'something', style: 'structured' },
    output: { output: 'output', other: 'other' },
    context: ['context1', 'context2'],
    metrics: [
      {
        evaluator: 'faithfulness',
        score: 0.5,
        rationale: 'somewhat faithful',
        traceId: '123',
        spanId: '456',
      },
      {
        evaluator: 'answer_relevancy',
        error: 'errored\nwith\nreturns\n',
      },
    ],
    traceIds: ['abc123', 'defhij'],
    reference: { structured: 'structured', output: 'output', other: 'other' },
  },
  {
    testCaseId: 'poqiweurqwepru',
    input: 'This is just a string',
    output: 'This is also just a string',
    context: [],
    metrics: [
      {
        evaluator: 'faithfulness',
        score: 0,
        rationale:
          'The provided context does not mention typical cat behaviors, so I cannot answer this question from the provided context.',
        traceId: '789',
        spanId: '101',
      },
      {
        evaluator: 'answer_relevancy',
        error: 'errored, with a comma',
      },
    ],
    traceIds: [],
  },
];

const EVAL_RUN_KEY: EvalRunKey = {
  actionRef: 'flow/myAwesomeFlow',
  evalRunId: 'abc1234',
  createdAt: new Date().toISOString(),
};

const EVAL_RUN: EvalRun = {
  key: EVAL_RUN_KEY,
  results: EVAL_RESULTS,
};

const CSV_OUTPUT_FILE = '/tmp/myAwesomeOutput.csv';
const JSON_OUTPUT_FILE = '/tmp/myAwesomeOutput.json';

describe('exporter', () => {
  beforeEach(() => {
    fs.promises.writeFile = jest.fn(async () => Promise.resolve(undefined));
  });

  afterEach(() => {
    jest.restoreAllMocks();
  });

  describe('toCsv', () => {
    it('should handle vanilla strings', () => {
      const evalResults: EvalResult[] = [
        {
          testCaseId: 'testCase1',
          input: 'input',
          output: 'output',
          context: [],
          metrics: [
            {
              evaluator: 'faithfulness',
              score: 0.5,
              rationale: 'faithful',
              traceId: '123',
              spanId: '456',
            },
          ],
          traceIds: [],
        },
      ];
      toCsv({ key: EVAL_RUN_KEY, results: evalResults }, CSV_OUTPUT_FILE);

      const expectedHeader =
        'testCaseId,input,output,context,traceIds,faithfulness_score,faithfulness_rationale,faithfulness_error,faithfulness_traceId,faithfulness_spanId';
      const expectedRecord = `testCase1,input,output,[],[],0.5,faithful,,123,456`;
      expect(fs.promises.writeFile).toHaveBeenCalledWith(
        CSV_OUTPUT_FILE,
        `${expectedHeader}\n${expectedRecord}`
      );
    });

    it('should unnest metrics', () => {
      const evalResults: EvalResult[] = [
        {
          testCaseId: 'testCase1',
          input: 'input',
          output: 'output',
          context: [],
          metrics: [
            {
              evaluator: 'faithfulness',
              score: 0.5,
              rationale: 'faithful',
              traceId: '123',
              spanId: '456',
            },
            {
              evaluator: 'answer_relevancy',
              score: 1.0,
              rationale: 'relevant',
              traceId: '789',
              spanId: '101',
            },
          ],
          traceIds: [],
        },
      ];

      toCsv({ key: EVAL_RUN_KEY, results: evalResults }, CSV_OUTPUT_FILE);

      const expectedHeader =
        'testCaseId,input,output,context,traceIds,faithfulness_score,faithfulness_rationale,faithfulness_error,faithfulness_traceId,faithfulness_spanId,answer_relevancy_score,answer_relevancy_rationale,answer_relevancy_error,answer_relevancy_traceId,answer_relevancy_spanId';
      const expectedRecord = `testCase1,input,output,[],[],0.5,faithful,,123,456,1,relevant,,789,101`;
      expect(fs.promises.writeFile).toHaveBeenCalledWith(
        CSV_OUTPUT_FILE,
        `${expectedHeader}\n${expectedRecord}`
      );
    });

    it('should handle errors in metrics', () => {
      const evalResults: EvalResult[] = [
        {
          testCaseId: 'testCase1',
          input: 'input',
          output: 'output',
          context: [],
          metrics: [
            {
              evaluator: 'faithfulness',
              error: 'This is an error!',
            },
          ],
          traceIds: [],
        },
      ];

      toCsv({ key: EVAL_RUN_KEY, results: evalResults }, CSV_OUTPUT_FILE);

      const expectedHeader =
        'testCaseId,input,output,context,traceIds,faithfulness_score,faithfulness_rationale,faithfulness_error,faithfulness_traceId,faithfulness_spanId';
      const expectedRecord = `testCase1,input,output,[],[],,,This is an error!,,`;
      expect(fs.promises.writeFile).toHaveBeenCalledWith(
        CSV_OUTPUT_FILE,
        `${expectedHeader}\n${expectedRecord}`
      );
    });

    it('should stringify structured input', () => {
      const evalResults: EvalResult[] = [
        {
          testCaseId: 'testCase1',
          input: { subject: 'something', style: 'structured' },
          output: 'output',
          context: [],
          metrics: [
            {
              evaluator: 'faithfulness',
              score: 0.5,
              rationale: 'faithful',
              traceId: '123',
              spanId: '456',
            },
          ],
          traceIds: [],
        },
      ];

      toCsv({ key: EVAL_RUN_KEY, results: evalResults }, CSV_OUTPUT_FILE);

      const expectedHeader =
        'testCaseId,input,output,context,traceIds,faithfulness_score,faithfulness_rationale,faithfulness_error,faithfulness_traceId,faithfulness_spanId';
      const expectedRecord = `testCase1,\"{\"\"subject\"\":\"\"something\"\",\"\"style\"\":\"\"structured\"\"}\",output,[],[],0.5,faithful,,123,456`;
      expect(fs.promises.writeFile).toHaveBeenCalledWith(
        CSV_OUTPUT_FILE,
        `${expectedHeader}\n${expectedRecord}`
      );
    });

    it('should handle carriage returns', () => {
      const evalResults: EvalResult[] = [
        {
          testCaseId: 'testCase1',
          input: 'input',
          output: 'output',
          context: [],
          metrics: [
            {
              evaluator: 'faithfulness',
              error: 'errored\nwith\na\ncarriage return',
            },
          ],
          traceIds: [],
        },
      ];

      toCsv({ key: EVAL_RUN_KEY, results: evalResults }, CSV_OUTPUT_FILE);

      const expectedHeader =
        'testCaseId,input,output,context,traceIds,faithfulness_score,faithfulness_rationale,faithfulness_error,faithfulness_traceId,faithfulness_spanId';
      const expectedRecord = `testCase1,input,output,[],[],,,\"errored
with
a
carriage return\",,`;
      expect(fs.promises.writeFile).toHaveBeenCalledWith(
        CSV_OUTPUT_FILE,
        `${expectedHeader}\n${expectedRecord}`
      );
    });

    it('should handle context and trace arrays', () => {
      const evalResults: EvalResult[] = [
        {
          testCaseId: 'testCase1',
          input: 'input',
          output: 'output',
          context: ['context1', 'context2'],
          metrics: [
            {
              evaluator: 'faithfulness',
              score: 0.5,
              rationale: 'faithful',
              traceId: '123',
              spanId: '456',
            },
          ],
          traceIds: ['trace1', 'trace2'],
        },
      ];

      toCsv({ key: EVAL_RUN_KEY, results: evalResults }, CSV_OUTPUT_FILE);

      const expectedHeader =
        'testCaseId,input,output,context,traceIds,faithfulness_score,faithfulness_rationale,faithfulness_error,faithfulness_traceId,faithfulness_spanId';
      const expectedRecord = `testCase1,input,output,\"[\"\"context1\"\",\"\"context2\"\"]\",\"[\"\"trace1\"\",\"\"trace2\"\"]\",0.5,faithful,,123,456`;
      expect(fs.promises.writeFile).toHaveBeenCalledWith(
        CSV_OUTPUT_FILE,
        `${expectedHeader}\n${expectedRecord}`
      );
    });

    it('should handle commas in strings', () => {
      const evalResults: EvalResult[] = [
        {
          testCaseId: 'testCase1',
          input: 'input, with, extra, commas',
          output: 'output',
          context: [],
          metrics: [
            {
              evaluator: 'faithfulness',
              score: 0.5,
              rationale: 'faithful',
              traceId: '123',
              spanId: '456',
            },
          ],
          traceIds: [],
        },
      ];

      toCsv({ key: EVAL_RUN_KEY, results: evalResults }, CSV_OUTPUT_FILE);

      const expectedHeader =
        'testCaseId,input,output,context,traceIds,faithfulness_score,faithfulness_rationale,faithfulness_error,faithfulness_traceId,faithfulness_spanId';
      const expectedRecord = `testCase1,\"input, with, extra, commas\",output,[],[],0.5,faithful,,123,456`;
      expect(fs.promises.writeFile).toHaveBeenCalledWith(
        CSV_OUTPUT_FILE,
        `${expectedHeader}\n${expectedRecord}`
      );
    });

    it('should include ground truth', () => {
      const evalResults: EvalResult[] = [
        {
          testCaseId: 'testCase1',
          input: 'input',
          output: 'output',
          context: [],
          metrics: [
            {
              evaluator: 'faithfulness',
              score: 0.5,
              rationale: 'faithful',
              traceId: '123',
              spanId: '456',
            },
          ],
          traceIds: [],
          reference: 'This is the honest truth',
        },
      ];

      toCsv({ key: EVAL_RUN_KEY, results: evalResults }, CSV_OUTPUT_FILE);

      const expectedHeader =
        'testCaseId,input,output,context,traceIds,reference,faithfulness_score,faithfulness_rationale,faithfulness_error,faithfulness_traceId,faithfulness_spanId';
      const expectedRecord = `testCase1,input,output,[],[],This is the honest truth,0.5,faithful,,123,456`;
      expect(fs.promises.writeFile).toHaveBeenCalledWith(
        CSV_OUTPUT_FILE,
        `${expectedHeader}\n${expectedRecord}`
      );
    });
  });

  describe('toJson', () => {
    it('should write json string', () => {
      toJson(EVAL_RUN, JSON_OUTPUT_FILE);
      expect(fs.promises.writeFile).toHaveBeenCalledWith(
        JSON_OUTPUT_FILE,
        JSON.stringify(EVAL_RESULTS, undefined, '  ')
      );
    });
  });
});