Genkit MCP

Official

Overview Schema Related Servers Score Discussions

evaluator_test.go•10.1 KiB

// Copyright 2025 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package ai import ( "context" "errors" "fmt" "testing" "github.com/firebase/genkit/go/internal/registry" ) var testEvalFunc = func(ctx context.Context, req *EvaluatorCallbackRequest) (*EvaluatorCallbackResponse, error) { m := make(map[string]any) m["reasoning"] = "No good reason" m["options"] = req.Options score := Score{ Id: "testScore", Score: 1, Status: ScoreStatusPass.String(), Details: m, } callbackResponse := EvaluatorCallbackResponse{ TestCaseId: req.Input.TestCaseId, Evaluation: []Score{score}, } return &callbackResponse, nil } var testBatchEvalFunc = func(ctx context.Context, req *EvaluatorRequest) (*EvaluatorResponse, error) { var evalResponses []EvaluationResult for _, datapoint := range req.Dataset { fmt.Printf("%+v\n", datapoint) m := make(map[string]any) m["reasoning"] = fmt.Sprintf("batch of cookies, %s", datapoint.Input) m["options"] = req.Options score := Score{ Id: "testScore", Score: true, Status: ScoreStatusPass.String(), Details: m, } callbackResponse := EvaluationResult{ TestCaseId: datapoint.TestCaseId, Evaluation: []Score{score}, } evalResponses = append(evalResponses, callbackResponse) } return &evalResponses, nil } var testFailingEvalFunc = func(ctx context.Context, req *EvaluatorCallbackRequest) (*EvaluatorCallbackResponse, error) { return nil, errors.New("i give up") } var evalOpts = EvaluatorOptions{ DisplayName: "Test Evaluator", Definition: "Returns pass score for all", IsBilled: false, } var dataset = []*Example{ { Input: "hello world", }, { Input: "Foo bar", }, } var testRequest = EvaluatorRequest{ Dataset: dataset, EvaluationId: "testrun", Options: "test-options", } func TestSimpleEvaluator(t *testing.T) { r := registry.New() evaluator := DefineEvaluator(r, "test/testEvaluator", &evalOpts, testEvalFunc) resp, err := evaluator.Evaluate(context.Background(), &testRequest) if err != nil { t.Fatal(err) } if got, want := len(*resp), 2; got != want { t.Errorf("got %v, want %v", got, want) } if got, want := (*resp)[0].Evaluation[0].Id, "testScore"; got != want { t.Errorf("got %v, want %v", got, want) } if got, want := (*resp)[0].Evaluation[0].Score, 1; got != want { t.Errorf("got %v, want %v", got, want) } if got, want := (*resp)[0].Evaluation[0].Status, "PASS"; got != want { t.Errorf("got %v, want %v", got, want) } if got, want := (*resp)[0].Evaluation[0].Details["options"], "test-options"; got != want { t.Errorf("got %v, want %v", got, want) } } func TestOptionsRequired(t *testing.T) { r := registry.New() _ = DefineEvaluator(r, "test/testEvaluator", &evalOpts, testEvalFunc) _ = DefineBatchEvaluator(r, "test/testBatchEvaluator", &evalOpts, testBatchEvalFunc) } func TestFailingEvaluator(t *testing.T) { r := registry.New() evalAction := DefineEvaluator(r, "test/testEvaluator", &evalOpts, testFailingEvalFunc) resp, err := evalAction.Evaluate(context.Background(), &testRequest) if err != nil { t.Fatal(err) } if got, dontWant := (*resp)[0].Evaluation[0].Error, ""; got == dontWant { t.Errorf("got %v, dontWant %v", got, dontWant) } if got, want := (*resp)[0].Evaluation[0].Status, "FAIL"; got != want { t.Errorf("got %v, want %v", got, want) } } func TestLookupEvaluator(t *testing.T) { r := registry.New() DefineEvaluator(r, "test/testEvaluator", &evalOpts, testEvalFunc) DefineBatchEvaluator(r, "test/testBatchEvaluator", &evalOpts, testBatchEvalFunc) if LookupEvaluator(r, "test/testEvaluator") == nil { t.Errorf("LookupEvaluator(r, \"test/testEvaluator\") is nil") } if LookupEvaluator(r, "test/testBatchEvaluator") == nil { t.Errorf("LookupEvaluator(r, \"test/testBatchEvaluator\") is nil") } } func TestEvaluate(t *testing.T) { r := registry.New() evalAction := DefineEvaluator(r, "test/testEvaluator", &evalOpts, testEvalFunc) resp, err := Evaluate(context.Background(), r, WithEvaluator(evalAction), WithDataset(dataset...), WithID("testrun"), WithConfig("test-options")) if err != nil { t.Fatal(err) } if got, want := (*resp)[0].Evaluation[0].Id, "testScore"; got != want { t.Errorf("got %v, want %v", got, want) } if got, want := (*resp)[0].Evaluation[0].Score, 1; got != want { t.Errorf("got %v, want %v", got, want) } if got, want := (*resp)[0].Evaluation[0].Status, "PASS"; got != want { t.Errorf("got %v, want %v", got, want) } if got, want := (*resp)[0].Evaluation[0].Details["options"], "test-options"; got != want { t.Errorf("got %v, want %v", got, want) } } func TestBatchEvaluator(t *testing.T) { r := registry.New() evalAction := DefineBatchEvaluator(r, "test/testBatchEvaluator", &evalOpts, testBatchEvalFunc) resp, err := evalAction.Evaluate(context.Background(), &testRequest) if err != nil { t.Fatal(err) } if got, want := len(*resp), 2; got != want { t.Errorf("got %v, want %v", got, want) } if got, want := (*resp)[0].Evaluation[0].Id, "testScore"; got != want { t.Errorf("got %v, want %v", got, want) } if got, want := (*resp)[0].Evaluation[0].Score, true; got != want { t.Errorf("got %v, want %v", got, want) } if got, want := (*resp)[0].Evaluation[0].Status, "PASS"; got != want { t.Errorf("got %v, want %v", got, want) } if got, want := (*resp)[0].Evaluation[0].Details["options"], "test-options"; got != want { t.Errorf("got %v, want %v", got, want) } } func TestNewEvaluatorRef(t *testing.T) { t.Run("creates evaluator reference with name and config", func(t *testing.T) { config := map[string]any{"threshold": 0.8} ref := NewEvaluatorRef("test/myEvaluator", config) if ref.Name() != "test/myEvaluator" { t.Errorf("Name() = %q, want %q", ref.Name(), "test/myEvaluator") } if ref.Config() == nil { t.Error("Config() = nil, want config") } if ref.Config().(map[string]any)["threshold"] != 0.8 { t.Errorf("Config()[threshold] = %v, want 0.8", ref.Config().(map[string]any)["threshold"]) } }) t.Run("creates evaluator reference with nil config", func(t *testing.T) { ref := NewEvaluatorRef("test/simpleEvaluator", nil) if ref.Name() != "test/simpleEvaluator" { t.Errorf("Name() = %q, want %q", ref.Name(), "test/simpleEvaluator") } if ref.Config() != nil { t.Errorf("Config() = %v, want nil", ref.Config()) } }) t.Run("implements EvaluatorArg interface", func(t *testing.T) { ref := NewEvaluatorRef("test/interface", nil) var _ EvaluatorArg = ref // compile-time check if ref.Name() != "test/interface" { t.Errorf("Name() = %q, want %q", ref.Name(), "test/interface") } }) } func TestEvaluatorRefUsedWithEvaluate(t *testing.T) { r := registry.New() // Define evaluator that uses config DefineEvaluator(r, "test/configEvaluator", &evalOpts, func(ctx context.Context, req *EvaluatorCallbackRequest) (*EvaluatorCallbackResponse, error) { score := Score{ Id: "configScore", Score: 1, Status: ScoreStatusPass.String(), Details: map[string]any{"options": req.Options}, } return &EvaluatorCallbackResponse{ TestCaseId: req.Input.TestCaseId, Evaluation: []Score{score}, }, nil }) // Use EvaluatorRef instead of direct evaluator ref := NewEvaluatorRef("test/configEvaluator", "ref-config-value") resp, err := Evaluate(context.Background(), r, WithEvaluator(ref), WithDataset(&Example{Input: "test"}), WithID("testrun")) if err != nil { t.Fatal(err) } // Config from ref should be used since no explicit config was provided if got, want := (*resp)[0].Evaluation[0].Details["options"], "ref-config-value"; got != want { t.Errorf("got config %v, want %v", got, want) } } func TestScoreStatusString(t *testing.T) { tests := []struct { status ScoreStatus want string }{ {ScoreStatusUnknown, "UNKNOWN"}, {ScoreStatusFail, "FAIL"}, {ScoreStatusPass, "PASS"}, } for _, tt := range tests { t.Run(tt.want, func(t *testing.T) { got := tt.status.String() if got != tt.want { t.Errorf("String() = %q, want %q", got, tt.want) } }) } } func TestNewEvaluator(t *testing.T) { t.Run("panics with empty name", func(t *testing.T) { defer func() { if r := recover(); r == nil { t.Error("expected panic for empty name") } }() NewEvaluator("", &evalOpts, testEvalFunc) }) t.Run("creates evaluator with nil options", func(t *testing.T) { eval := NewEvaluator("test/nilOpts", nil, testEvalFunc) if eval == nil { t.Error("NewEvaluator returned nil") } if eval.Name() != "test/nilOpts" { t.Errorf("Name() = %q, want %q", eval.Name(), "test/nilOpts") } }) } func TestNewBatchEvaluator(t *testing.T) { t.Run("panics with empty name", func(t *testing.T) { defer func() { if r := recover(); r == nil { t.Error("expected panic for empty name") } }() NewBatchEvaluator("", &evalOpts, testBatchEvalFunc) }) t.Run("creates batch evaluator with nil options", func(t *testing.T) { eval := NewBatchEvaluator("test/batchNilOpts", nil, testBatchEvalFunc) if eval == nil { t.Error("NewBatchEvaluator returned nil") } }) } func TestEvaluateNilEvaluator(t *testing.T) { t.Run("returns error when evaluator not set", func(t *testing.T) { r := registry.New() _, err := Evaluate(context.Background(), r, WithDataset(&Example{Input: "test"})) if err == nil { t.Error("expected error when evaluator not set, got nil") } }) t.Run("returns error for non-existent evaluator", func(t *testing.T) { r := registry.New() ref := NewEvaluatorRef("test/nonexistent", nil) _, err := Evaluate(context.Background(), r, WithEvaluator(ref), WithDataset(&Example{Input: "test"})) if err == nil { t.Error("expected error for non-existent evaluator, got nil") } }) }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/firebase/genkit'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

evaluator_test.go•10.1 KiB