compare_models
Query multiple models in parallel with the same prompt and get a side-by-side comparison including latency and token counts.
Instructions
Query 2-5 models in parallel with the same prompt. Returns side-by-side comparison with latency and token metrics.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| models | Yes | List of model IDs to compare (2-5 models) | |
| prompt | Yes | The prompt to send to all models | |
| system_prompt | No | Optional system prompt for all models | |
| format | No | Response format — 'brief' for token-efficient summary, 'detailed' for full response | detailed |
| temperature | No | ||
| max_tokens | No |
Implementation Reference
- src/tools/compare-models.ts:46-87 (handler)Main handler function for compare_models. Fans out to 2-5 models in parallel using Promise.allSettled, collects results with graceful degradation on failures, then formats the comparison as a markdown table with latency/token metrics.
export async function compareModels( provider: Provider, input: CompareModelsInput ): Promise<string> { const startTime = Date.now(); // Fan out to all models in parallel const results = await Promise.allSettled( input.models.map((model) => provider.query(model, input.prompt, { system_prompt: input.system_prompt, temperature: input.temperature, max_tokens: input.max_tokens, }) ) ); // Collect results, including failures const compared: CompareResult[] = results.map((result, i) => { if (result.status === "fulfilled") { return { model: input.models[i], content: result.value.content, latency_ms: result.value.latency_ms, tokens: result.value.usage?.total_tokens, }; } else { return { model: input.models[i], content: "", latency_ms: 0, error: result.reason instanceof Error ? result.reason.message : String(result.reason), }; } }); const totalTime = Date.now() - startTime; return formatComparison(compared, totalTime, input.format ?? "detailed"); } - src/tools/compare-models.ts:19-36 (schema)Zod schema for compare_models input validation. Defines models (array of 2-5 strings), prompt (string), optional system_prompt, format (brief/detailed, default detailed), temperature (0-2), and max_tokens (default 1024).
export const compareModelsSchema = z.object({ models: z .array(z.string()) .min(2) .max(5) .describe("List of model IDs to compare (2-5 models)"), prompt: z.string().describe("The prompt to send to all models"), system_prompt: z.string().optional().describe("Optional system prompt for all models"), format: z .enum(["brief", "detailed"]) .optional() .default("detailed") .describe("Response format — 'brief' for token-efficient summary, 'detailed' for full response"), temperature: z.number().min(0).max(2).optional(), max_tokens: z.number().int().positive().optional().default(1024), }); export type CompareModelsInput = z.infer<typeof compareModelsSchema>; - src/server.ts:123-142 (registration)Registration of compare_models tool on the MCP server. Calls server.tool() with the name, description, schema.shape, and a handler that invokes compareModels(provider, input) and returns the result.
// --- compare_models --- server.tool( "compare_models", "Query 2-5 models in parallel with the same prompt. Returns side-by-side comparison with latency and token metrics.", compareModelsSchema.shape, async (input) => { logger.info(`compare_models: querying ${input.models.join(", ")}`); try { const result = await compareModels(provider, input); return { content: [{ type: "text" as const, text: result }] }; } catch (err) { const message = err instanceof Error ? err.message : String(err); logger.error(`compare_models failed: ${message}`); return { content: [{ type: "text" as const, text: `Error: ${message}` }], isError: true, }; } } ); - src/tools/compare-models.ts:89-138 (helper)Helper function formatComparison() that formats the comparison results into markdown. Separates successful/failed results, shows a summary table with latency/token metrics (marking fastest), then either brief (first 200 chars) or detailed content per model, plus error notes.
function formatComparison(results: CompareResult[], totalTime: number, format: string): string { const successful = results.filter((r) => !r.error); const failed = results.filter((r) => r.error); const lines: string[] = [ `## Model Comparison (${results.length} models, ${totalTime}ms total)`, "", ]; // Summary table if (successful.length > 0) { const fastest = successful.reduce((a, b) => a.latency_ms < b.latency_ms ? a : b ); lines.push("| Model | Latency | Tokens |"); lines.push("|-------|---------|--------|"); for (const r of successful) { const badge = r.model === fastest.model ? " fastest" : ""; lines.push( `| ${r.model} | ${r.latency_ms}ms${badge} | ${r.tokens ?? "n/a"} |` ); } lines.push(""); } // Each model's response (brief = first 200 chars, detailed = full) for (const r of successful) { lines.push(`### ${r.model}`); lines.push(""); if (format === "brief") { const summary = r.content.slice(0, 200); lines.push(summary + (r.content.length > 200 ? "..." : "")); } else { lines.push(r.content); } lines.push(""); } // Failures if (failed.length > 0) { lines.push("### Errors"); for (const r of failed) { lines.push(`- **${r.model}:** ${r.error}`); } lines.push(""); } return lines.join("\n"); } - src/tools/compare-models.ts:38-44 (helper)CompareResult interface used internally to hold each model's result including model name, content, latency_ms, tokens, and optional error string.
interface CompareResult { model: string; content: string; latency_ms: number; tokens?: number; error?: string; }