HydraMCP

consensus.ts•10.6 KiB

/** * consensus — Ask multiple models, aggregate into a single answer. * * This is the "I need confidence" tool. Instead of getting 5 opinions * and reading them all, you get one answer with a confidence score. * * Strategy options: * - majority: >50% of models agree * - supermajority: >=66% agree (for higher confidence) * - unanimous: 100% agree (for critical decisions) * * How "agreement" works: * We use a judge model to evaluate whether responses agree semantically. * One of the available models gets picked as a judge (or the user can * specify one). The judge reads all responses and groups them by * agreement. This is way better than keyword matching because it * understands that "start with a monolith" and "monolith, it's simpler" * are the same answer. * * Falls back to naive keyword matching if the judge call fails. */ import { z } from "zod"; import { Provider } from "../providers/provider.js"; import { logger } from "../utils/logger.js"; export const consensusSchema = z.object({ models: z .array(z.string()) .min(3) .max(7) .describe("List of model IDs to poll (3-7 models)"), prompt: z.string().describe("The prompt to send to all models"), strategy: z .enum(["majority", "supermajority", "unanimous"]) .optional() .default("majority") .describe("Voting strategy — how many models must agree"), judge_model: z.string().optional().describe("Optional model ID to use as judge. Auto-picks if not specified."), system_prompt: z.string().optional(), temperature: z.number().min(0).max(2).optional().default(0), max_tokens: z.number().int().positive().optional().default(1024), }); export type ConsensusInput = z.infer<typeof consensusSchema>; interface ModelVote { model: string; content: string; error?: string; } export async function consensus( provider: Provider, input: ConsensusInput ): Promise<string> { // Query all models in parallel const results = await Promise.allSettled( input.models.map((model) => provider.query(model, input.prompt, { system_prompt: input.system_prompt, temperature: input.temperature, max_tokens: input.max_tokens, }) ) ); const votes: ModelVote[] = results.map((result, i) => { if (result.status === "fulfilled") { return { model: input.models[i], content: result.value.content }; } return { model: input.models[i], content: "", error: result.reason instanceof Error ? result.reason.message : String(result.reason), }; }); const successful = votes.filter((v) => !v.error); const failed = votes.filter((v) => v.error); if (successful.length < 2) { return `## Consensus Failed\n\nOnly ${successful.length} model(s) responded. Need at least 2 for consensus.\n\nErrors:\n${failed.map((f) => `- ${f.model}: ${f.error}`).join("\n")}`; } const threshold = getThreshold(input.strategy ?? "majority"); const requiredVotes = Math.ceil(successful.length * threshold); // Use a judge model to determine agreement const judgeModel = input.judge_model ?? await pickJudge(provider, input.models); let agreeing: ModelVote[]; let dissenting: ModelVote[]; let judgeLatency: number | undefined; if (judgeModel) { logger.info(`consensus: using ${judgeModel} as judge`); const judgeStart = Date.now(); const judgeResult = await judgeAgreement(provider, judgeModel, successful); judgeLatency = Date.now() - judgeStart; if (judgeResult) { agreeing = judgeResult.agreeing; dissenting = judgeResult.dissenting; } else { // Judge failed, fall back to keyword matching logger.warn("consensus: judge failed, falling back to keyword matching"); ({ agreeing, dissenting } = keywordFallback(successful)); } } else { logger.warn("consensus: no judge available, using keyword matching"); ({ agreeing, dissenting } = keywordFallback(successful)); } const reached = agreeing.length >= requiredVotes; return formatConsensus({ reached, strategy: input.strategy ?? "majority", agreeing, dissenting, failed, requiredVotes, totalVoters: successful.length, judgeModel, judgeLatency, }); } function getThreshold(strategy: "majority" | "supermajority" | "unanimous"): number { switch (strategy) { case "majority": return 0.5; case "supermajority": return 0.66; case "unanimous": return 1.0; } } /** * Pick a judge model. Prefers a model not in the poll list so * there's no conflict of interest. Falls back to first available * if all models are in the poll. */ async function pickJudge(provider: Provider, polledModels: string[]): Promise<string | null> { try { const available = await provider.listModels(); if (available.length === 0) return null; // Prefer a model that's NOT being polled const polledSet = new Set(polledModels.map((m) => m.toLowerCase())); const outside = available.find( (m) => !polledSet.has(m.id.toLowerCase()) && !polledSet.has(m.id.split("/").pop()?.toLowerCase() ?? "") ); if (outside) return outside.id; // Everyone's in the poll. Just use the first available model. return available[0].id; } catch { return null; } } /** * Ask a judge model to group responses by agreement. * Returns the largest agreement group as "agreeing" and the rest as "dissenting". */ async function judgeAgreement( provider: Provider, judgeModel: string, votes: ModelVote[] ): Promise<{ agreeing: ModelVote[]; dissenting: ModelVote[] } | null> { const responseSummary = votes .map((v, i) => `Response ${i + 1} (${v.model}):\n${v.content}`) .join("\n\n---\n\n"); const judgePrompt = `You are judging whether multiple AI model responses agree with each other. Here are ${votes.length} responses to the same question: ${responseSummary} Do these responses fundamentally agree on the same answer/position, even if they use different words or go into different levels of detail? Reply with ONLY valid JSON in this exact format, no other text: {"groups": [[0, 1, 2]], "reasoning": "all three say the same thing"} Rules: - Each group is an array of response numbers (0-indexed) - Responses that agree go in the same group - If all responses agree, put them all in one group like [[0, 1, 2]] - If there are two camps, make two groups like [[0, 1], [2]] - Focus on the substance of the answer, not the wording - "reasoning" should be one short sentence`; try { const result = await provider.query(judgeModel, judgePrompt, { temperature: 0, max_tokens: 256, }); // Parse the judge's JSON response const jsonMatch = result.content.match(/\{[\s\S]*\}/); if (!jsonMatch) { logger.warn("consensus judge: no JSON found in response"); return null; } const parsed = JSON.parse(jsonMatch[0]); if (!parsed.groups || !Array.isArray(parsed.groups)) { logger.warn("consensus judge: invalid groups format"); return null; } // Find the largest agreement group const groups: number[][] = parsed.groups; const largest = groups.reduce((a, b) => (a.length >= b.length ? a : b), []); const agreeing: ModelVote[] = largest.map((i: number) => votes[i]).filter(Boolean); const agreeingSet = new Set(largest); const dissenting: ModelVote[] = votes.filter((_, i) => !agreeingSet.has(i)); logger.info( `consensus judge: ${agreeing.length}/${votes.length} agree. ${parsed.reasoning ?? ""}` ); return { agreeing, dissenting }; } catch (err) { logger.warn(`consensus judge failed: ${err instanceof Error ? err.message : String(err)}`); return null; } } /** * Keyword-based fallback when no judge model is available. * Naive but better than nothing. */ function keywordFallback(votes: ModelVote[]): { agreeing: ModelVote[]; dissenting: ModelVote[] } { const baseline = votes[0]; const agreeing = [baseline]; const dissenting: ModelVote[] = []; for (let i = 1; i < votes.length; i++) { if (responsesAgreeByKeywords(baseline.content, votes[i].content)) { agreeing.push(votes[i]); } else { dissenting.push(votes[i]); } } return { agreeing, dissenting }; } function responsesAgreeByKeywords(a: string, b: string): boolean { const wordsA = new Set( a.toLowerCase().split(/\s+/).filter((w) => w.length > 4) ); const wordsB = new Set( b.toLowerCase().split(/\s+/).filter((w) => w.length > 4) ); if (wordsA.size === 0 || wordsB.size === 0) return false; let overlap = 0; for (const word of wordsA) { if (wordsB.has(word)) overlap++; } const similarity = overlap / Math.max(wordsA.size, wordsB.size); return similarity > 0.3; } interface ConsensusResult { reached: boolean; strategy: string; agreeing: ModelVote[]; dissenting: ModelVote[]; failed: ModelVote[]; requiredVotes: number; totalVoters: number; judgeModel?: string | null; judgeLatency?: number; } function formatConsensus(result: ConsensusResult): string { const confidence = Math.round( (result.agreeing.length / result.totalVoters) * 100 ); const lines: string[] = [ `## Consensus: ${result.reached ? "REACHED" : "NOT REACHED"}`, "", `**Strategy:** ${result.strategy} (needed ${result.requiredVotes}/${result.totalVoters})`, `**Agreement:** ${result.agreeing.length}/${result.totalVoters} models (${confidence}%)`, result.judgeModel ? `**Judge:** ${result.judgeModel}${result.judgeLatency ? ` (${result.judgeLatency}ms)` : ""}` : "", "", ]; // Show the consensus answer (first agreeing model's response) if (result.agreeing.length > 0) { lines.push("### Consensus Response"); lines.push(""); lines.push(result.agreeing[0].content); lines.push(""); lines.push( `*Agreed by: ${result.agreeing.map((v) => v.model).join(", ")}*` ); lines.push(""); } // Show what each model actually said so the judge can be sanity-checked const allVotes = [...result.agreeing, ...result.dissenting]; if (allVotes.length > 1) { lines.push("### Individual Responses"); for (const v of allVotes) { const summary = v.content.slice(0, 150).replace(/\n/g, " "); lines.push(`- **${v.model}:** ${summary}${v.content.length > 150 ? "..." : ""}`); } lines.push(""); } // Show dissent if (result.dissenting.length > 0) { lines.push("### Dissenting Views"); for (const d of result.dissenting) { lines.push(`- **${d.model}:** ${d.content.slice(0, 200)}${d.content.length > 200 ? "..." : ""}`); } lines.push(""); } // Show failures if (result.failed.length > 0) { lines.push(`*${result.failed.length} model(s) failed to respond*`); } return lines.join("\n"); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Pickle-Pixel/HydraMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

consensus.ts•10.6 KiB