Skip to main content
Glama

Genkit MCP

Official
by firebase
evaluator.ts12.3 kB
/** * Copyright 2024 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import { action, z, type Action } from '@genkit-ai/core'; import { logger } from '@genkit-ai/core/logging'; import type { Registry } from '@genkit-ai/core/registry'; import { toJsonSchema } from '@genkit-ai/core/schema'; import { SPAN_TYPE_ATTR, runInNewSpan } from '@genkit-ai/core/tracing'; import { randomUUID } from 'crypto'; export const ATTR_PREFIX = 'genkit'; export const SPAN_STATE_ATTR = ATTR_PREFIX + ':state'; export const BaseDataPointSchema = z.object({ input: z.unknown(), output: z.unknown().optional(), context: z.array(z.unknown()).optional(), reference: z.unknown().optional(), testCaseId: z.string().optional(), traceIds: z.array(z.string()).optional(), }); // DataPoint that is to be used for actions. This needs testCaseId to be present. export const BaseEvalDataPointSchema = BaseDataPointSchema.extend({ testCaseId: z.string(), }); export type BaseEvalDataPoint = z.infer<typeof BaseEvalDataPointSchema>; const EvalStatusEnumSchema = z.enum(['UNKNOWN', 'PASS', 'FAIL']); /** Enum that indicates if an evaluation has passed or failed */ export enum EvalStatusEnum { UNKNOWN = 'UNKNOWN', PASS = 'PASS', FAIL = 'FAIL', } export const ScoreSchema = z.object({ id: z .string() .describe( 'Optional ID to differentiate different scores if applying in a single evaluation' ) .optional(), score: z.union([z.number(), z.string(), z.boolean()]).optional(), status: EvalStatusEnumSchema.optional(), error: z.string().optional(), details: z .object({ reasoning: z.string().optional(), }) .passthrough() .optional(), }); // Update genkit-tools/src/utils/evals.ts if you change this value export const EVALUATOR_METADATA_KEY_DISPLAY_NAME = 'evaluatorDisplayName'; export const EVALUATOR_METADATA_KEY_DEFINITION = 'evaluatorDefinition'; export const EVALUATOR_METADATA_KEY_IS_BILLED = 'evaluatorIsBilled'; export type Score = z.infer<typeof ScoreSchema>; export type BaseDataPoint = z.infer<typeof BaseDataPointSchema>; export type Dataset< DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema, > = Array<z.infer<DataPoint>>; export const EvalResponseSchema = z.object({ sampleIndex: z.number().optional(), testCaseId: z.string(), traceId: z.string().optional(), spanId: z.string().optional(), evaluation: z.union([ScoreSchema, z.array(ScoreSchema)]), }); export type EvalResponse = z.infer<typeof EvalResponseSchema>; export const EvalResponsesSchema = z.array(EvalResponseSchema); export type EvalResponses = z.infer<typeof EvalResponsesSchema>; export type EvaluatorFn< EvalDataPoint extends typeof BaseEvalDataPointSchema = typeof BaseEvalDataPointSchema, CustomOptions extends z.ZodTypeAny = z.ZodTypeAny, > = ( input: z.infer<EvalDataPoint>, evaluatorOptions?: z.infer<CustomOptions> ) => Promise<EvalResponse>; export type EvaluatorAction< DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema, CustomOptions extends z.ZodTypeAny = z.ZodTypeAny, > = Action<typeof EvalRequestSchema, typeof EvalResponsesSchema> & { __dataPointType?: DataPoint; __configSchema?: CustomOptions; }; function withMetadata< DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema, CustomOptions extends z.ZodTypeAny = z.ZodTypeAny, >( evaluator: Action<typeof EvalRequestSchema, typeof EvalResponsesSchema>, dataPointType?: DataPoint, configSchema?: CustomOptions ): EvaluatorAction<DataPoint, CustomOptions> { const withMeta = evaluator as EvaluatorAction<DataPoint, CustomOptions>; withMeta.__dataPointType = dataPointType; withMeta.__configSchema = configSchema; return withMeta; } const EvalRequestSchema = z.object({ dataset: z.array(BaseDataPointSchema), evalRunId: z.string(), options: z.unknown(), }); export interface EvaluatorParams< DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema, CustomOptions extends z.ZodTypeAny = z.ZodTypeAny, > { evaluator: EvaluatorArgument<DataPoint, CustomOptions>; dataset: Dataset<DataPoint>; evalRunId?: string; options?: z.infer<CustomOptions>; } export interface EvaluatorOptions< DataPoint extends typeof BaseDataPointSchema, EvaluatorOpts extends z.ZodTypeAny, > { name: string; displayName: string; definition: string; dataPointType?: DataPoint; configSchema?: EvaluatorOpts; isBilled?: boolean; } /** * Creates evaluator action for the provided {@link EvaluatorFn} implementation. */ export function defineEvaluator< DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema, EvalDataPoint extends typeof BaseEvalDataPointSchema = typeof BaseEvalDataPointSchema, EvaluatorOpts extends z.ZodTypeAny = z.ZodTypeAny, >( registry: Registry, options: EvaluatorOptions<DataPoint, EvaluatorOpts>, runner: EvaluatorFn<EvalDataPoint, EvaluatorOpts> ): EvaluatorAction { const e = evaluator(options, runner); registry.registerAction('evaluator', e); return e; } /** * Creates evaluator action for the provided {@link EvaluatorFn} implementation. */ export function evaluator< DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema, EvalDataPoint extends typeof BaseEvalDataPointSchema = typeof BaseEvalDataPointSchema, EvaluatorOpts extends z.ZodTypeAny = z.ZodTypeAny, >( options: EvaluatorOptions<DataPoint, EvaluatorOpts>, runner: EvaluatorFn<EvalDataPoint, EvaluatorOpts> ): EvaluatorAction { const evalMetadata = {}; evalMetadata[EVALUATOR_METADATA_KEY_IS_BILLED] = options.isBilled == undefined ? true : options.isBilled; evalMetadata[EVALUATOR_METADATA_KEY_DISPLAY_NAME] = options.displayName; evalMetadata[EVALUATOR_METADATA_KEY_DEFINITION] = options.definition; if (options.configSchema) { evalMetadata['customOptions'] = toJsonSchema({ schema: options.configSchema, }); } const evaluator = action( { actionType: 'evaluator', name: options.name, inputSchema: EvalRequestSchema.extend({ dataset: options.dataPointType ? z.array(options.dataPointType) : z.array(BaseDataPointSchema), options: options.configSchema ?? z.unknown(), evalRunId: z.string(), batchSize: z.number().optional(), }), outputSchema: EvalResponsesSchema, metadata: { type: 'evaluator', evaluator: evalMetadata, }, }, async (i) => { const evalResponses: EvalResponses = []; // This also populates missing testCaseIds const batches = getBatchedArray(i.dataset, i.batchSize); for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) { const batch = batches[batchIndex]; try { await runInNewSpan( { metadata: { name: i.batchSize ? `Batch ${batchIndex}` : `Test Case ${batch[0].testCaseId}`, metadata: { 'evaluator:evalRunId': i.evalRunId }, }, labels: { [SPAN_TYPE_ATTR]: 'evaluator', }, }, async (metadata, otSpan) => { const spanId = otSpan.spanContext().spanId; const traceId = otSpan.spanContext().traceId; const evalRunPromises = batch.map((d, index) => { const sampleIndex = i.batchSize ? i.batchSize * batchIndex + index : batchIndex; const datapoint = d as BaseEvalDataPoint; metadata.input = { input: datapoint.input, output: datapoint.output, context: datapoint.context, }; const evalOutputPromise = runner(datapoint, i.options) .then((result) => ({ ...result, traceId, spanId, sampleIndex, })) .catch((error) => { return { sampleIndex, spanId, traceId, testCaseId: datapoint.testCaseId, evaluation: { error: `Evaluation of test case ${datapoint.testCaseId} failed: \n${error}`, }, }; }); return evalOutputPromise; }); const allResults = await Promise.all(evalRunPromises); metadata.output = allResults.length === 1 ? allResults[0] : allResults; allResults.map((result) => { evalResponses.push(result); }); } ); } catch (e) { logger.error( `Evaluation of batch ${batchIndex} failed: \n${(e as Error).stack}` ); continue; } } return evalResponses; } ); const ewm = withMetadata( evaluator as any as Action< typeof EvalRequestSchema, typeof EvalResponsesSchema >, options.dataPointType, options.configSchema ); return ewm; } export type EvaluatorArgument< DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema, CustomOptions extends z.ZodTypeAny = z.ZodTypeAny, > = | string | EvaluatorAction<DataPoint, CustomOptions> | EvaluatorReference<CustomOptions>; /** * A veneer for interacting with evaluators. */ export async function evaluate< DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema, CustomOptions extends z.ZodTypeAny = z.ZodTypeAny, >( registry: Registry, params: EvaluatorParams<DataPoint, CustomOptions> ): Promise<EvalResponses> { let evaluator: EvaluatorAction<DataPoint, CustomOptions>; if (typeof params.evaluator === 'string') { evaluator = await registry.lookupAction(`/evaluator/${params.evaluator}`); } else if (Object.hasOwnProperty.call(params.evaluator, 'info')) { evaluator = await registry.lookupAction( `/evaluator/${params.evaluator.name}` ); } else { evaluator = params.evaluator as EvaluatorAction<DataPoint, CustomOptions>; } if (!evaluator) { throw new Error('Unable to utilize the provided evaluator'); } return (await evaluator({ dataset: params.dataset, options: params.options, evalRunId: params.evalRunId ?? randomUUID(), })) as EvalResponses; } export const EvaluatorInfoSchema = z.object({ /** Friendly label for this evaluator */ label: z.string().optional(), metrics: z.array(z.string()), }); export type EvaluatorInfo = z.infer<typeof EvaluatorInfoSchema>; export interface EvaluatorReference<CustomOptions extends z.ZodTypeAny> { name: string; configSchema?: CustomOptions; info?: EvaluatorInfo; } /** * Helper method to configure a {@link EvaluatorReference} to a plugin. */ export function evaluatorRef< CustomOptionsSchema extends z.ZodTypeAny = z.ZodTypeAny, >( options: EvaluatorReference<CustomOptionsSchema> ): EvaluatorReference<CustomOptionsSchema> { return { ...options }; } /** * Helper method to generated batched array. Also ensures each testCase has a * testCaseId */ function getBatchedArray<T extends { testCaseId?: string }>( arr: T[], batchSize?: number ): T[][] { let size: number; if (!batchSize) { size = 1; } else { size = batchSize; } const batches: T[][] = []; for (var i = 0; i < arr.length; i += size) { batches.push( arr.slice(i, i + size).map((d) => ({ ...d, testCaseId: d.testCaseId ?? randomUUID(), })) ); } return batches; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/firebase/genkit'

If you have feedback or need assistance with the MCP directory API, please join our Discord server