Skip to main content
Glama

Sentry MCP

Official
by getsentry
search-events-agent.eval.ts6.4 kB
import { describeEval } from "vitest-evals"; import { ToolCallScorer } from "vitest-evals"; import { searchEventsAgent } from "@sentry/mcp-server/tools/search-events/agent"; import { SentryApiService } from "@sentry/mcp-server/api-client"; import { StructuredOutputScorer } from "./utils/structuredOutputScorer"; import "../setup-env"; // The shared MSW server is already started in setup-env.ts describeEval("search-events-agent", { data: async () => { return [ { // Simple query with common fields - should NOT require tool calls input: "Show me all errors from today", expectedTools: [], expected: { dataset: "errors", query: "", // No filters, just time range sort: "-timestamp", timeRange: { statsPeriod: "24h" }, }, }, { // Query with "me" reference - should only require whoami input: "Show me my errors from last week", expectedTools: [ { name: "whoami", arguments: {}, }, ], expected: { dataset: "errors", query: /user\.email:test@example\.com|user\.id:123456/, // Can be either sort: "-timestamp", timeRange: { statsPeriod: "7d" }, }, }, { // Common performance query - should NOT require tool calls input: "Show me slow API calls taking more than 1 second", expectedTools: [], expected: { dataset: "spans", query: /span\.duration:>1000|span\.duration:>1s/, // Can express as ms or seconds sort: "-span.duration", }, }, { // Query with OpenTelemetry attributes that need discovery input: "Show me LLM calls where temperature setting is above 0.7", expectedTools: [ { name: "datasetAttributes", arguments: { dataset: "spans", }, }, { name: "otelSemantics", arguments: { namespace: "gen_ai", dataset: "spans", }, }, ], expected: { dataset: "spans", query: "gen_ai.request.temperature:>0.7", sort: "-span.duration", }, }, { // Query with custom field requiring discovery input: "Find errors with custom.payment.processor field", expectedTools: [ { name: "datasetAttributes", arguments: { dataset: "errors", }, }, ], expected: { dataset: "errors", query: "has:custom.payment.processor", sort: "-timestamp", }, }, { // Query with custom field requiring discovery input: "Show me spans where custom.db.pool_size is greater than 10", expectedTools: [ { name: "datasetAttributes", arguments: { dataset: "spans", }, }, ], expected: { dataset: "spans", query: "custom.db.pool_size:>10", sort: "-span.duration", }, }, { // Query requiring equation field calculation input: "How many total tokens did we consume yesterday", expectedTools: [ { name: "datasetAttributes", arguments: { dataset: "spans", }, }, // Agent may find gen_ai fields and use them for calculation ], expected: { dataset: "spans", // For aggregations, query filter is optional - empty query gets all spans query: /^$|has:gen_ai\.usage\.(input_tokens|output_tokens)/, // Equation to sum both token types fields: [ "equation|sum(gen_ai.usage.input_tokens) + sum(gen_ai.usage.output_tokens)", ], // Sort by the equation result in descending order sort: "-equation|sum(gen_ai.usage.input_tokens) + sum(gen_ai.usage.output_tokens)", timeRange: { statsPeriod: "24h" }, }, }, { // Query that tests sort field self-correction // Agent should self-correct by adding count() to fields when sorting by it input: "Show me the top 10 most frequent error types", expectedTools: [], expected: { dataset: "errors", query: "", // No specific filter, just aggregate all errors // Agent should include count() in fields since we're sorting by it fields: ["error.type", "count()"], // Sort by count in descending order to get "most frequent" sort: "-count()", // timeRange can be null or have a default period }, }, { // Complex aggregate query that tests sort field self-correction // Agent should self-correct by including avg(span.duration) in fields input: "Show me database operations grouped by type, sorted by average duration", expectedTools: [ { name: "datasetAttributes", arguments: { dataset: "spans", }, }, ], expected: { dataset: "spans", query: "has:db.operation", // Agent must include avg(span.duration) since we're sorting by it // Use db.operation as the grouping field (span.op is deprecated) fields: ["db.operation", "avg(span.duration)"], // Sort by average duration sort: "-avg(span.duration)", // timeRange is optional }, }, ]; }, task: async (input) => { // Create a real API service that will use MSW mocks const apiService = new SentryApiService({ accessToken: "test-token", }); const agentResult = await searchEventsAgent({ query: input, organizationSlug: "sentry-mcp-evals", apiService, }); return { result: JSON.stringify(agentResult.result), toolCalls: agentResult.toolCalls.map((call: any) => ({ name: call.toolName, arguments: call.args, })), }; }, scorers: [ ToolCallScorer(), // Validates tool calls StructuredOutputScorer({ match: "fuzzy" }), // Validates the structured query output with flexible matching ], });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/getsentry/sentry-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server