Skip to main content
Glama
grade.ts3.68 kB
import { MCPServerStdio, run, StreamedRunResult, withTrace } from '@openai/agents'; import { getAgent, log } from './base.js'; import { evaluationSchema } from './evaluationResult.js'; type GradeInput = { mcpServer: MCPServerStdio; model: string; prompt: string; }; const agentSystemPrompt = ` You are an assistant responsible for evaluating the results of calling various tools. Given the user's query, use the tools available to you to answer the question.`; const evalSystemPrompt = ` You are an expert evaluator assessing how well an LLM answers a given question. Review the provided answer and score it from 1 to 5 in each of the following categories: Accuracy - Does the answer contain factual errors or hallucinations? Completeness - Does the answer fully address all parts of the question? Relevance - Is the information directly related to the question? Clarity - Is the explanation easy to understand and well-structured? Reasoning - Does the answer show logical thinking or provide evidence or rationale? Return your evaluation as a JSON object in the format: { "accuracy": 1-5, "completeness": 1-5, "relevance": 1-5, "clarity": 1-5, "reasoning": 1-5, "comments": "A short paragraph summarizing the strengths and weaknesses of the answer." }`; export async function grade({ mcpServer, model, prompt, }: GradeInput): Promise<{ agentResult: StreamedRunResult<any, any> }> { const evals = await promptAgent({ mcpServer, model, prompt }); log('\n'); const evalAgentPrompt = ` Here is the user input: ${prompt} Here is the LLM's answer: ${evals.finalOutput}`; const evalAgent = await getAgent({ model, systemPrompt: evalSystemPrompt, }); const result = await withTrace('run_eval_agent', async () => { const stream = await run(evalAgent, evalAgentPrompt, { stream: true }); if (process.env.ENABLE_LOGGING === 'true') { stream.toTextStream({ compatibleWithNodeStreams: true }).pipe(process.stdout); } await stream.completed; return stream; }); log('\n'); const jsonRegexes = [/(?<JSON>\{[^}]+\})/]; for (const jsonRegex of jsonRegexes) { const match = result.finalOutput?.match(jsonRegex); if (match) { const evaluationResult = evaluationSchema.safeParse(JSON.parse(match.groups?.JSON ?? '{}')); if (!evaluationResult.success) { throw new Error( `Could not parse agent output as an evaluation result:\n${result.finalOutput}`, ); } const evaluation = evaluationResult.data; expect(evaluation.accuracy).toBeGreaterThanOrEqual(4); expect(evaluation.completeness).toBeGreaterThanOrEqual(4); expect(evaluation.relevance).toBeGreaterThanOrEqual(4); expect(evaluation.clarity).toBeGreaterThanOrEqual(4); expect(evaluation.reasoning).toBeGreaterThanOrEqual(4); return { agentResult: evals, }; } } throw new Error('Could not parse JSON from agent output'); } async function promptAgent({ mcpServer, model, prompt, }: GradeInput): Promise<StreamedRunResult<any, any>> { log(`Evaluating prompt: ${prompt}`, true); const agentWithTools = await getAgent({ mcpServer, model, systemPrompt: agentSystemPrompt, }); const result = await withTrace('run_agent_with_tools', async () => { const stream = await run(agentWithTools, prompt, { stream: true }); if (process.env.ENABLE_LOGGING === 'true') { stream.toTextStream({ compatibleWithNodeStreams: true }).pipe(process.stdout); } await stream.completed; return stream; }); return result; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/datalabs89/tableau-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server