de en es ja ko ru zh

MCP Sage

by jalehman

Overview Schema Related Servers Score Discussions

TypeScript

Remote

mcp-sage
src
orchestrator

debateOrchestrator.ts

debateOrchestrator.ts•25.5 KiB

/** * Debate Orchestrator * * Manages the multi-model debate process for all tool types (plan, opinion, review) * using the Strategy pattern to customize behavior per tool type. */ import { performance } from "perf_hooks"; import { DebateOptions, DebateResult, DebateConfig, DebateMeta, DebateWarning, DebateLog, ToolType, } from "../types/public"; import { DebateStrategy, DebatePhase } from "../strategies/strategyTypes"; import { getStrategy } from "../strategies/registry"; import { selectModelBasedOnTokens, getAvailableModels, sendToModel, } from "../modelManager"; import { getToolConfig, getModelById } from "../modelConfig"; // Type for notification function passed from MCP export type NotificationFn = (notification: { level: "info" | "debug" | "warning" | "error"; data: string; }) => Promise<void>; /** * Create a mapping between model names and anonymous IDs */ function createModelMapping(models: string[]): { idToModel: Record<string, string>; modelToId: Record<string, string>; } { const idToModel: Record<string, string> = {}; const modelToId: Record<string, string> = {}; // Use letters A, B, C, etc. as model IDs const ids = ["A", "B", "C", "D", "E", "F", "G", "H"]; models.forEach((model, index) => { if (index < ids.length) { const id = ids[index]; idToModel[id] = model; modelToId[model] = id; } }); return { idToModel, modelToId }; } /** * Create a token budget manager */ function createTokenBudget(limit: number) { let usedTokens = 0; return { getStatus: () => ({ limit, used: usedTokens, remaining: Math.max(0, limit - usedTokens), }), beginRound: (estimatedTokens: number) => { return usedTokens + estimatedTokens <= limit; }, recordUsage: (tokens: number) => { usedTokens += tokens; }, }; } /** * Chunk a large array into smaller batches for parallel processing */ function chunkArray<T>(array: T[], size: number): T[][] { const result: T[][] = []; for (let i = 0; i < array.length; i += size) { result.push(array.slice(i, i + size)); } return result; } /** * Extract a confidence score from the judge's response */ function extractConfidenceScore(text: string): number { const match = text.match(/Confidence Score:\s*(0\.\d+|1\.0|1)/i); if (match && match[1]) { return parseFloat(match[1]); } return 0.5; // Default mid-level confidence } /** * Model response types */ interface ModelResponse { text: string; tokenUsage: { prompt: number; completion: number; total: number; }; } /** * Convert string response from sendToModel to ModelResponse */ function parseModelResponse( response: string, promptLength: number, ): ModelResponse { // Estimate token counts based on text length const promptTokens = Math.ceil(promptLength / 4); const completionTokens = Math.ceil(response.length / 4); return { text: response, tokenUsage: { prompt: promptTokens, completion: completionTokens, total: promptTokens + completionTokens, }, }; } /** * Main orchestration function for running debates */ export async function runDebate( options: DebateOptions, sendNotification: NotificationFn, ): Promise<DebateResult> { // 1. Merge & normalize config const strategy = await getStrategy(options.toolType); if (!strategy) { throw new Error(`No strategy available for tool type ${options.toolType}`); } // Combine configs with defaults const config: Required<DebateConfig> = { enabled: options.debate ?? options.debateConfig?.enabled ?? false, rounds: options.debateConfig?.rounds ?? strategy.configDefaults?.rounds ?? 3, strategy: options.debateConfig?.strategy ?? options.toolType, maxTotalTokens: options.debateConfig?.maxTotalTokens ?? 0, logLevel: options.debateConfig?.logLevel ?? strategy.configDefaults?.logLevel ?? "warn", }; // Short-circuit if debate is disabled if (!config.enabled) { await sendNotification({ level: "info", data: `Debate disabled for ${options.toolType}. Using single-model inference.`, }); // Return basic result without debate // This should be handled elsewhere - in this case, we'll just return an error throw new Error("Debate must be enabled to use runDebate function"); } // Initialize tracking data const startTime = performance.now(); const phaseTimings: Record<string, number> = {}; const warnings: DebateWarning[] = []; const transcript: string[] = []; const fallbacks: { phase: string; reason: string }[] = []; let totalPromptTokens = 0; let totalCompletionTokens = 0; // Helper to track phase timings const timePhase = async <T>( phaseName: string, func: () => Promise<T>, ): Promise<T> => { const phaseStart = performance.now(); try { return await func(); } finally { phaseTimings[phaseName] = performance.now() - phaseStart; } }; // Helper to record warnings const addWarning = ( code: DebateWarning["code"], message: string, phase: DebateWarning["phase"], ) => { warnings.push({ code, message, phase }); sendNotification({ level: "warning", data: `[${code}] ${message}`, }); }; // Helper to record fallbacks const addFallback = (phase: string, reason: string) => { fallbacks.push({ phase, reason }); if (config.logLevel === "debug") { sendNotification({ level: "debug", data: `Fallback in ${phase}: ${reason}`, }); } }; // Helper to record transcript entries (only in debug mode) const addTranscript = (entry: string) => { if (config.logLevel === "debug") { transcript.push(entry); } }; // Determine available models based on tool type const availableModels = getAvailableModels(options.toolType).filter((m) => m.available); if (availableModels.length === 0) { throw new Error( "No models available. Please set either OPENAI_API_KEY or GEMINI_API_KEY environment variables.", ); } // Select models for the debate (for now, just use all available models) const debateModels = availableModels.map((m) => m.name); const modelMapping = createModelMapping(debateModels); const { idToModel, modelToId } = modelMapping; // Set up token budget if specified const tokenBudget = config.maxTotalTokens > 0 ? createTokenBudget(config.maxTotalTokens) : null; // Set up the debate context const debateContext = { userPrompt: options.userPrompt, candidates: [] as string[], critiques: [] as string[], round: 1, }; // Track which model created each candidate const candidateModelMapping: { candidateIndex: number; modelId: string; modelName: string; }[] = []; await sendNotification({ level: "info", data: `Starting debate for ${options.toolType} with ${debateModels.length} models and ${config.rounds} rounds`, }); let finalOutput = ""; let judgeResult: any = null; // 3. Single-Model Shortcut if (debateModels.length === 1) { await sendNotification({ level: "info", data: "Only one model available. Using simplified single-model flow.", }); // Just generate a single candidate and validate it const modelId = Object.keys(idToModel)[0]; const modelName = idToModel[modelId]; const generatePrompt = strategy.getPrompt("generate", debateContext); // Record the prompt in the transcript addTranscript(`[SINGLE-MODEL GENERATE]\nPrompt:\n${generatePrompt}\n`); try { // Generate the candidate const response = await timePhase<ModelResponse>("generate", async () => { const rawResponse = await sendToModel( generatePrompt, { modelName, modelType: modelName.includes("gemini") ? "gemini" : "openai", tokenCount: generatePrompt.length / 4, }, sendNotification, ); return parseModelResponse(rawResponse, generatePrompt.length); }); // Record token usage totalPromptTokens += response.tokenUsage.prompt; totalCompletionTokens += response.tokenUsage.completion; if (tokenBudget) { tokenBudget.recordUsage(response.tokenUsage.total); } // Record the response in the transcript addTranscript(`[SINGLE-MODEL RESPONSE]\n${response.text}\n`); // Use the response as final output finalOutput = response.text; } catch (error) { // Handle generation failure addWarning( "GEN_FAIL", `Generation failed: ${error instanceof Error ? error.message : String(error)}`, "generate", ); // No candidates to choose from, so return error message finalOutput = `Error generating output: ${error instanceof Error ? error.message : String(error)}`; } } else { // 4. Multi-Model Debate Loop for (let round = 1; round <= config.rounds; round++) { await sendNotification({ level: "info", data: `Starting debate round ${round}/${config.rounds}`, }); // Update debate context for this round debateContext.round = round; // Check token budget before starting the round if (tokenBudget && !tokenBudget.beginRound(50000)) { // Rough estimate addWarning( "TOKEN_BUDGET", "Token budget exceeded. Ending debate early.", "generate", ); break; } // a. Generation phase if (round === 1) { await sendNotification({ level: "info", data: "Generation phase: Creating initial candidates...", }); const modelIds = Object.keys(idToModel); const generationTasks = modelIds.map((modelId) => { const modelName = idToModel[modelId]; const modelType = modelName.includes("gemini") ? "gemini" : "openai"; return async () => { const generatePrompt = strategy.getPrompt("generate", { ...debateContext, round: parseInt(modelId, 36) || round, // Use the model ID as a numeric identifier }); // Record the prompt in the transcript addTranscript( `[GENERATE MODEL ${modelId}]\nPrompt:\n${generatePrompt}\n`, ); try { // Check if we're in test mode with API calls skipped if (process.env.SKIP_API_CALLS === "true") { // Create a mock response for testing const mockResponse = `Mock ${modelName} response for phase: generate\nModel: ${modelName}\nPrompt length: ${generatePrompt.length}\nThis is test output only.`; const response = parseModelResponse( mockResponse, generatePrompt.length, ); // Record token usage (mocked) totalPromptTokens += response.tokenUsage.prompt; totalCompletionTokens += response.tokenUsage.completion; if (tokenBudget) { tokenBudget.recordUsage(response.tokenUsage.total); } // Record the mock response in the transcript addTranscript( `[GENERATE RESPONSE MODEL ${modelId} (MOCK)]\n${response.text}\n`, ); // Add delay to simulate API call await new Promise((resolve) => setTimeout(resolve, 500)); // Store the mock candidate return response.text; } else { // Make actual API call const rawResponse = await sendToModel( generatePrompt, { modelName, modelType, tokenCount: generatePrompt.length / 4, }, sendNotification, ); const response = parseModelResponse( rawResponse, generatePrompt.length, ); // Record token usage totalPromptTokens += response.tokenUsage.prompt; totalCompletionTokens += response.tokenUsage.completion; if (tokenBudget) { tokenBudget.recordUsage(response.tokenUsage.total); } // Record the response in the transcript addTranscript( `[GENERATE RESPONSE MODEL ${modelId}]\n${response.text}\n`, ); // Store the candidate return response.text; } } catch (error) { // If generation fails, log but continue with other models addWarning( "GEN_FAIL", `Generation failed for model ${modelId}: ${error instanceof Error ? error.message : String(error)}`, "generate", ); return null; } }; }); // Run generation tasks in parallel with a parallelism of 3 const batches = chunkArray(generationTasks, 3); const candidateResults: (string | null)[] = []; await timePhase("generate", async () => { for (const batch of batches) { const batchResults = await Promise.all(batch.map((task) => task())); candidateResults.push(...batchResults); } }); // Filter out null results and track which model created each candidate let candidateIndex = 0; const availableModelIds = Object.keys(idToModel); debateContext.candidates = []; candidateModelMapping.length = 0; // Clear any existing mappings candidateResults.forEach((result, resultIndex) => { if (result !== null) { debateContext.candidates.push(result); const modelId = availableModelIds[resultIndex]; const modelName = idToModel[modelId]; candidateModelMapping.push({ candidateIndex, modelId, modelName, }); candidateIndex++; } }); if (debateContext.candidates.length === 0) { throw new Error( "All model generations failed. Cannot continue debate.", ); } } // Skip critique on the final round if (round < config.rounds) { // b. Critique phase await sendNotification({ level: "info", data: "Critique phase: Evaluating candidates...", }); if (config.logLevel === "debug") { await sendNotification({ level: "debug", data: `Starting critique phase with candidates: ${debateContext.candidates.length}`, }); } const modelIds = Object.keys(idToModel); if (config.logLevel === "debug") { await sendNotification({ level: "debug", data: `Model IDs for critique: ${modelIds.join(", ")}`, }); } const critiqueTasks = modelIds.map((modelId) => { const modelName = idToModel[modelId]; const modelType = modelName.includes("gemini") ? "gemini" : "openai"; return async () => { // For critique, each model critiques all candidates const critiquePrompt = strategy.getPrompt("critique", { ...debateContext, round: parseInt(modelId, 36) || round, // Use the model ID as a numeric identifier }); // Record the prompt in the transcript addTranscript( `[CRITIQUE MODEL ${modelId}]\nPrompt:\n${critiquePrompt}\n`, ); try { // Check if we're in test mode with API calls skipped if (process.env.SKIP_API_CALLS === "true") { // Create a mock response for testing const mockResponse = `Mock ${modelName} response for phase: critique\nModel: ${modelName}\nPrompt length: ${critiquePrompt.length}\nThis is test output only.`; const response = parseModelResponse( mockResponse, critiquePrompt.length, ); // Record token usage (mocked) totalPromptTokens += response.tokenUsage.prompt; totalCompletionTokens += response.tokenUsage.completion; if (tokenBudget) { tokenBudget.recordUsage(response.tokenUsage.total); } // Record the mock response in the transcript addTranscript( `[CRITIQUE RESPONSE MODEL ${modelId} (MOCK)]\n${response.text}\n`, ); // Add delay to simulate API call await new Promise((resolve) => setTimeout(resolve, 500)); // Store the mock critique return response.text; } else { // Make actual API call let response; try { const rawResponse = await sendToModel( critiquePrompt, { modelName, modelType, tokenCount: critiquePrompt.length / 4, }, sendNotification, ); response = parseModelResponse( rawResponse, critiquePrompt.length, ); } catch (critErr) { throw critErr; } // Record token usage totalPromptTokens += response.tokenUsage.prompt; totalCompletionTokens += response.tokenUsage.completion; if (tokenBudget) { tokenBudget.recordUsage(response.tokenUsage.total); } // Record the response in the transcript addTranscript( `[CRITIQUE RESPONSE MODEL ${modelId}]\n${response.text}\n`, ); // Store the critique return response.text; } } catch (error) { // If critique fails, log but continue with other models addWarning( "GEN_FAIL", `Critique failed for model ${modelId}: ${error instanceof Error ? error.message : String(error)}`, "critique", ); return null; } }; }); // Run critique tasks in parallel with a parallelism of 3 const batches = chunkArray(critiqueTasks, 3); const critiqueResults: (string | null)[] = []; await timePhase("critique", async () => { for (const batch of batches) { const batchResults = await Promise.all(batch.map((task) => task())); critiqueResults.push(...batchResults); } }); // Filter out null results (failed critiques) debateContext.critiques = critiqueResults.filter( (r): r is string => r !== null, ); } } // c. Judge phase await sendNotification({ level: "info", data: "Judge phase: Selecting the best candidate...", }); // Get judge model from configuration const toolConfig = getToolConfig(options.toolType); const judgeModel = getModelById(toolConfig.judgeModel); // Check if we have the API key for the judge model const hasJudgeKey = judgeModel && ( (judgeModel.type === 'anthropic' && !!process.env.ANTHROPIC_API_KEY) || (judgeModel.type === 'openai' && !!process.env.OPENAI_API_KEY) || (judgeModel.type === 'gemini' && !!process.env.GEMINI_API_KEY) ); const judgeModelName = hasJudgeKey && judgeModel ? judgeModel.name : debateModels[0]; // Fallback to first debate model if judge unavailable const judgeModelType = judgeModel && hasJudgeKey ? judgeModel.type : availableModels[0].type; // Create the judge prompt const judgePrompt = strategy.getPrompt("judge", debateContext); // Record the prompt in the transcript addTranscript(`[JUDGE]\nPrompt:\n${judgePrompt}\n`); try { // Get the judge's decision const judgeResponse = await timePhase<ModelResponse>( "judge", async () => { // Check if we're in test mode with API calls skipped if (process.env.SKIP_API_CALLS === "true") { // Create a mock response for testing const mockResponse = `Mock ${judgeModelName} response for phase: judge Winner: Model A's plan Confidence Score: 0.8 Rationale: This is a mock judge response for testing purposes. In a real debate, this would contain the judge's rationale for selecting the winning candidate.`; // Add delay to simulate API call await new Promise((resolve) => setTimeout(resolve, 500)); return parseModelResponse(mockResponse, judgePrompt.length); } else { // Make actual API call const rawResponse = await sendToModel( judgePrompt, { modelName: judgeModelName, modelType: judgeModelType, tokenCount: judgePrompt.length / 4, }, sendNotification, ); return parseModelResponse(rawResponse, judgePrompt.length); } }, ); // Record token usage totalPromptTokens += judgeResponse.tokenUsage.prompt; totalCompletionTokens += judgeResponse.tokenUsage.completion; if (tokenBudget) { tokenBudget.recordUsage(judgeResponse.tokenUsage.total); } // Record the response in the transcript if (process.env.SKIP_API_CALLS === "true") { addTranscript(`[JUDGE RESPONSE (MOCK)]\n${judgeResponse.text}\n`); } else { addTranscript(`[JUDGE RESPONSE]\n${judgeResponse.text}\n`); } // Parse the judge's decision judgeResult = strategy.parseJudge( judgeResponse.text, debateContext.candidates, ); if (judgeResult.success) { if (judgeResult.winnerIdx === -1) { // Judge provided its own synthesis finalOutput = judgeResponse.text; } else { // Judge selected a winner from the candidates finalOutput = debateContext.candidates[judgeResult.winnerIdx]; } } else { // Judge failed to determine a winner addWarning("JUDGE_MALFORMED", judgeResult.error, "judge"); addFallback("judge", judgeResult.error); // Fallback to the first candidate finalOutput = debateContext.candidates[0]; } } catch (error) { // If judge phase fails, fallback to the first candidate addWarning( "JUDGE_MALFORMED", `Judge phase error: ${error instanceof Error ? error.message : String(error)}`, "judge", ); addFallback( "judge", `Error: ${error instanceof Error ? error.message : String(error)}`, ); // Use the first candidate as fallback finalOutput = debateContext.candidates[0]; } } // 6. Prepare the result metadata const meta: DebateMeta = { warnings, tokenUsage: { prompt: totalPromptTokens, completion: totalCompletionTokens, }, timings: { totalMs: performance.now() - startTime, perPhase: phaseTimings, }, strategy: strategy.toolType, rounds: debateContext.round, }; // Add winner information if we have a successful judge result await sendNotification({ level: "info", data: `Debug: judgeResult exists: ${!!judgeResult}, success: ${judgeResult?.success}, winnerIdx: ${judgeResult?.winnerIdx}`, }); if (judgeResult && judgeResult.success && judgeResult.winnerIdx >= 0) { await sendNotification({ level: "info", data: `Looking for winner at index ${judgeResult.winnerIdx} in mapping: ${JSON.stringify(candidateModelMapping)}`, }); const winnerMapping = candidateModelMapping.find( (m) => m.candidateIndex === judgeResult.winnerIdx, ); if (winnerMapping) { meta.winner = { modelId: winnerMapping.modelId, modelName: winnerMapping.modelName, }; await sendNotification({ level: "info", data: `Found winner: ${winnerMapping.modelName} (${winnerMapping.modelId})`, }); } else { await sendNotification({ level: "info", data: `No winner mapping found for index ${judgeResult.winnerIdx}`, }); } } // 7. Return the appropriate result shape based on the tool type const result: DebateResult = { toolType: options.toolType, meta, } as DebateResult; // Include debug logs if requested if (config.logLevel === "debug") { (result as any).debateLog = { transcript, fallbacks, } as DebateLog; } // Add the tool-specific result field switch (options.toolType) { case ToolType.Opinion: (result as any).opinion = finalOutput; break; case ToolType.Review: (result as any).review = finalOutput; break; } // 8. Send telemetry if enabled if (process.env.SAGE_TELEMETRY === "1") { try { // Simple anonymous telemetry logging console.info( JSON.stringify({ event: "debate_completion", toolType: options.toolType, rounds: debateContext.round, warnings: warnings.length, tokens: totalPromptTokens + totalCompletionTokens, durationMs: meta.timings.totalMs, modelsCount: debateModels.length, }), ); } catch (error) { // Ignore telemetry errors } } await sendNotification({ level: "info", data: `Debate completed for ${options.toolType} in ${Math.round(meta.timings.totalMs)}ms with ${warnings.length} warnings.`, }); return result; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jalehman/mcp-sage'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

debateOrchestrator.ts•25.5 KiB