Skip to main content
Glama
debateOrchestrator.ts26.1 kB
/** * Debate Orchestrator * * Manages the multi-model debate process for all tool types (plan, opinion, review) * using the Strategy pattern to customize behavior per tool type. */ import { performance } from "perf_hooks"; import { DebateOptions, DebateResult, DebateConfig, DebateMeta, DebateWarning, DebateLog, ToolType, } from "../types/public"; import { DebateStrategy, DebatePhase } from "../strategies/strategyTypes"; import { getStrategy } from "../strategies/registry"; import { selectModelBasedOnTokens, getAvailableModels, sendToModel, } from "../modelManager"; import { getToolConfig, getModelById } from "../modelConfig"; // Type for notification function passed from MCP export type NotificationFn = (notification: { level: "info" | "debug" | "warning" | "error"; data: string; }) => Promise<void>; /** * Create a mapping between model names and anonymous IDs */ function createModelMapping(models: string[]): { idToModel: Record<string, string>; modelToId: Record<string, string>; } { const idToModel: Record<string, string> = {}; const modelToId: Record<string, string> = {}; // Use letters A, B, C, etc. as model IDs const ids = ["A", "B", "C", "D", "E", "F", "G", "H"]; models.forEach((model, index) => { if (index < ids.length) { const id = ids[index]; idToModel[id] = model; modelToId[model] = id; } }); return { idToModel, modelToId }; } /** * Create a token budget manager */ function createTokenBudget(limit: number) { let usedTokens = 0; return { getStatus: () => ({ limit, used: usedTokens, remaining: Math.max(0, limit - usedTokens), }), beginRound: (estimatedTokens: number) => { return usedTokens + estimatedTokens <= limit; }, recordUsage: (tokens: number) => { usedTokens += tokens; }, }; } /** * Chunk a large array into smaller batches for parallel processing */ function chunkArray<T>(array: T[], size: number): T[][] { const result: T[][] = []; for (let i = 0; i < array.length; i += size) { result.push(array.slice(i, i + size)); } return result; } /** * Extract a confidence score from the judge's response */ function extractConfidenceScore(text: string): number { const match = text.match(/Confidence Score:\s*(0\.\d+|1\.0|1)/i); if (match && match[1]) { return parseFloat(match[1]); } return 0.5; // Default mid-level confidence } /** * Model response types */ interface ModelResponse { text: string; tokenUsage: { prompt: number; completion: number; total: number; }; } /** * Convert string response from sendToModel to ModelResponse */ function parseModelResponse( response: string, promptLength: number, ): ModelResponse { // Estimate token counts based on text length const promptTokens = Math.ceil(promptLength / 4); const completionTokens = Math.ceil(response.length / 4); return { text: response, tokenUsage: { prompt: promptTokens, completion: completionTokens, total: promptTokens + completionTokens, }, }; } /** * Main orchestration function for running debates */ export async function runDebate( options: DebateOptions, sendNotification: NotificationFn, ): Promise<DebateResult> { // 1. Merge & normalize config const strategy = await getStrategy(options.toolType); if (!strategy) { throw new Error(`No strategy available for tool type ${options.toolType}`); } // Combine configs with defaults const config: Required<DebateConfig> = { enabled: options.debate ?? options.debateConfig?.enabled ?? false, rounds: options.debateConfig?.rounds ?? strategy.configDefaults?.rounds ?? 3, strategy: options.debateConfig?.strategy ?? options.toolType, maxTotalTokens: options.debateConfig?.maxTotalTokens ?? 0, logLevel: options.debateConfig?.logLevel ?? strategy.configDefaults?.logLevel ?? "warn", }; // Short-circuit if debate is disabled if (!config.enabled) { await sendNotification({ level: "info", data: `Debate disabled for ${options.toolType}. Using single-model inference.`, }); // Return basic result without debate // This should be handled elsewhere - in this case, we'll just return an error throw new Error("Debate must be enabled to use runDebate function"); } // Initialize tracking data const startTime = performance.now(); const phaseTimings: Record<string, number> = {}; const warnings: DebateWarning[] = []; const transcript: string[] = []; const fallbacks: { phase: string; reason: string }[] = []; let totalPromptTokens = 0; let totalCompletionTokens = 0; // Helper to track phase timings const timePhase = async <T>( phaseName: string, func: () => Promise<T>, ): Promise<T> => { const phaseStart = performance.now(); try { return await func(); } finally { phaseTimings[phaseName] = performance.now() - phaseStart; } }; // Helper to record warnings const addWarning = ( code: DebateWarning["code"], message: string, phase: DebateWarning["phase"], ) => { warnings.push({ code, message, phase }); sendNotification({ level: "warning", data: `[${code}] ${message}`, }); }; // Helper to record fallbacks const addFallback = (phase: string, reason: string) => { fallbacks.push({ phase, reason }); if (config.logLevel === "debug") { sendNotification({ level: "debug", data: `Fallback in ${phase}: ${reason}`, }); } }; // Helper to record transcript entries (only in debug mode) const addTranscript = (entry: string) => { if (config.logLevel === "debug") { transcript.push(entry); } }; // Determine available models based on tool type const availableModels = getAvailableModels(options.toolType).filter((m) => m.available); if (availableModels.length === 0) { throw new Error( "No models available. Please set either OPENAI_API_KEY or GEMINI_API_KEY environment variables.", ); } // Select models for the debate (for now, just use all available models) const debateModels = availableModels.map((m) => m.name); const modelMapping = createModelMapping(debateModels); const { idToModel, modelToId } = modelMapping; // Set up token budget if specified const tokenBudget = config.maxTotalTokens > 0 ? createTokenBudget(config.maxTotalTokens) : null; // Set up the debate context const debateContext = { userPrompt: options.userPrompt, candidates: [] as string[], critiques: [] as string[], round: 1, }; // Track which model created each candidate const candidateModelMapping: { candidateIndex: number; modelId: string; modelName: string; }[] = []; await sendNotification({ level: "info", data: `Starting debate for ${options.toolType} with ${debateModels.length} models and ${config.rounds} rounds`, }); let finalOutput = ""; let judgeResult: any = null; // 3. Single-Model Shortcut if (debateModels.length === 1) { await sendNotification({ level: "info", data: "Only one model available. Using simplified single-model flow.", }); // Just generate a single candidate and validate it const modelId = Object.keys(idToModel)[0]; const modelName = idToModel[modelId]; const generatePrompt = strategy.getPrompt("generate", debateContext); // Record the prompt in the transcript addTranscript(`[SINGLE-MODEL GENERATE]\nPrompt:\n${generatePrompt}\n`); try { // Generate the candidate const response = await timePhase<ModelResponse>("generate", async () => { const rawResponse = await sendToModel( generatePrompt, { modelName, modelType: modelName.includes("gemini") ? "gemini" : "openai", tokenCount: generatePrompt.length / 4, }, sendNotification, ); return parseModelResponse(rawResponse, generatePrompt.length); }); // Record token usage totalPromptTokens += response.tokenUsage.prompt; totalCompletionTokens += response.tokenUsage.completion; if (tokenBudget) { tokenBudget.recordUsage(response.tokenUsage.total); } // Record the response in the transcript addTranscript(`[SINGLE-MODEL RESPONSE]\n${response.text}\n`); // Use the response as final output finalOutput = response.text; } catch (error) { // Handle generation failure addWarning( "GEN_FAIL", `Generation failed: ${error instanceof Error ? error.message : String(error)}`, "generate", ); // No candidates to choose from, so return error message finalOutput = `Error generating output: ${error instanceof Error ? error.message : String(error)}`; } } else { // 4. Multi-Model Debate Loop for (let round = 1; round <= config.rounds; round++) { await sendNotification({ level: "info", data: `Starting debate round ${round}/${config.rounds}`, }); // Update debate context for this round debateContext.round = round; // Check token budget before starting the round if (tokenBudget && !tokenBudget.beginRound(50000)) { // Rough estimate addWarning( "TOKEN_BUDGET", "Token budget exceeded. Ending debate early.", "generate", ); break; } // a. Generation phase if (round === 1) { await sendNotification({ level: "info", data: "Generation phase: Creating initial candidates...", }); const modelIds = Object.keys(idToModel); const generationTasks = modelIds.map((modelId) => { const modelName = idToModel[modelId]; const modelType = modelName.includes("gemini") ? "gemini" : "openai"; return async () => { const generatePrompt = strategy.getPrompt("generate", { ...debateContext, round: parseInt(modelId, 36) || round, // Use the model ID as a numeric identifier }); // Record the prompt in the transcript addTranscript( `[GENERATE MODEL ${modelId}]\nPrompt:\n${generatePrompt}\n`, ); try { // Check if we're in test mode with API calls skipped if (process.env.SKIP_API_CALLS === "true") { // Create a mock response for testing const mockResponse = `Mock ${modelName} response for phase: generate\nModel: ${modelName}\nPrompt length: ${generatePrompt.length}\nThis is test output only.`; const response = parseModelResponse( mockResponse, generatePrompt.length, ); // Record token usage (mocked) totalPromptTokens += response.tokenUsage.prompt; totalCompletionTokens += response.tokenUsage.completion; if (tokenBudget) { tokenBudget.recordUsage(response.tokenUsage.total); } // Record the mock response in the transcript addTranscript( `[GENERATE RESPONSE MODEL ${modelId} (MOCK)]\n${response.text}\n`, ); // Add delay to simulate API call await new Promise((resolve) => setTimeout(resolve, 500)); // Store the mock candidate return response.text; } else { // Make actual API call const rawResponse = await sendToModel( generatePrompt, { modelName, modelType, tokenCount: generatePrompt.length / 4, }, sendNotification, ); const response = parseModelResponse( rawResponse, generatePrompt.length, ); // Record token usage totalPromptTokens += response.tokenUsage.prompt; totalCompletionTokens += response.tokenUsage.completion; if (tokenBudget) { tokenBudget.recordUsage(response.tokenUsage.total); } // Record the response in the transcript addTranscript( `[GENERATE RESPONSE MODEL ${modelId}]\n${response.text}\n`, ); // Store the candidate return response.text; } } catch (error) { // If generation fails, log but continue with other models addWarning( "GEN_FAIL", `Generation failed for model ${modelId}: ${error instanceof Error ? error.message : String(error)}`, "generate", ); return null; } }; }); // Run generation tasks in parallel with a parallelism of 3 const batches = chunkArray(generationTasks, 3); const candidateResults: (string | null)[] = []; await timePhase("generate", async () => { for (const batch of batches) { const batchResults = await Promise.all(batch.map((task) => task())); candidateResults.push(...batchResults); } }); // Filter out null results and track which model created each candidate let candidateIndex = 0; const availableModelIds = Object.keys(idToModel); debateContext.candidates = []; candidateModelMapping.length = 0; // Clear any existing mappings candidateResults.forEach((result, resultIndex) => { if (result !== null) { debateContext.candidates.push(result); const modelId = availableModelIds[resultIndex]; const modelName = idToModel[modelId]; candidateModelMapping.push({ candidateIndex, modelId, modelName, }); candidateIndex++; } }); if (debateContext.candidates.length === 0) { throw new Error( "All model generations failed. Cannot continue debate.", ); } } // Skip critique on the final round if (round < config.rounds) { // b. Critique phase await sendNotification({ level: "info", data: "Critique phase: Evaluating candidates...", }); if (config.logLevel === "debug") { await sendNotification({ level: "debug", data: `Starting critique phase with candidates: ${debateContext.candidates.length}`, }); } const modelIds = Object.keys(idToModel); if (config.logLevel === "debug") { await sendNotification({ level: "debug", data: `Model IDs for critique: ${modelIds.join(", ")}`, }); } const critiqueTasks = modelIds.map((modelId) => { const modelName = idToModel[modelId]; const modelType = modelName.includes("gemini") ? "gemini" : "openai"; return async () => { // For critique, each model critiques all candidates const critiquePrompt = strategy.getPrompt("critique", { ...debateContext, round: parseInt(modelId, 36) || round, // Use the model ID as a numeric identifier }); // Record the prompt in the transcript addTranscript( `[CRITIQUE MODEL ${modelId}]\nPrompt:\n${critiquePrompt}\n`, ); try { // Check if we're in test mode with API calls skipped if (process.env.SKIP_API_CALLS === "true") { // Create a mock response for testing const mockResponse = `Mock ${modelName} response for phase: critique\nModel: ${modelName}\nPrompt length: ${critiquePrompt.length}\nThis is test output only.`; const response = parseModelResponse( mockResponse, critiquePrompt.length, ); // Record token usage (mocked) totalPromptTokens += response.tokenUsage.prompt; totalCompletionTokens += response.tokenUsage.completion; if (tokenBudget) { tokenBudget.recordUsage(response.tokenUsage.total); } // Record the mock response in the transcript addTranscript( `[CRITIQUE RESPONSE MODEL ${modelId} (MOCK)]\n${response.text}\n`, ); // Add delay to simulate API call await new Promise((resolve) => setTimeout(resolve, 500)); // Store the mock critique return response.text; } else { // Make actual API call let response; try { const rawResponse = await sendToModel( critiquePrompt, { modelName, modelType, tokenCount: critiquePrompt.length / 4, }, sendNotification, ); response = parseModelResponse( rawResponse, critiquePrompt.length, ); } catch (critErr) { throw critErr; } // Record token usage totalPromptTokens += response.tokenUsage.prompt; totalCompletionTokens += response.tokenUsage.completion; if (tokenBudget) { tokenBudget.recordUsage(response.tokenUsage.total); } // Record the response in the transcript addTranscript( `[CRITIQUE RESPONSE MODEL ${modelId}]\n${response.text}\n`, ); // Store the critique return response.text; } } catch (error) { // If critique fails, log but continue with other models addWarning( "GEN_FAIL", `Critique failed for model ${modelId}: ${error instanceof Error ? error.message : String(error)}`, "critique", ); return null; } }; }); // Run critique tasks in parallel with a parallelism of 3 const batches = chunkArray(critiqueTasks, 3); const critiqueResults: (string | null)[] = []; await timePhase("critique", async () => { for (const batch of batches) { const batchResults = await Promise.all(batch.map((task) => task())); critiqueResults.push(...batchResults); } }); // Filter out null results (failed critiques) debateContext.critiques = critiqueResults.filter( (r): r is string => r !== null, ); } } // c. Judge phase await sendNotification({ level: "info", data: "Judge phase: Selecting the best candidate...", }); // Get judge model from configuration const toolConfig = getToolConfig(options.toolType); const judgeModel = getModelById(toolConfig.judgeModel); // Check if we have the API key for the judge model const hasJudgeKey = judgeModel && ( (judgeModel.type === 'anthropic' && !!process.env.ANTHROPIC_API_KEY) || (judgeModel.type === 'openai' && !!process.env.OPENAI_API_KEY) || (judgeModel.type === 'gemini' && !!process.env.GEMINI_API_KEY) ); const judgeModelName = hasJudgeKey && judgeModel ? judgeModel.name : debateModels[0]; // Fallback to first debate model if judge unavailable const judgeModelType = judgeModel && hasJudgeKey ? judgeModel.type : availableModels[0].type; // Create the judge prompt const judgePrompt = strategy.getPrompt("judge", debateContext); // Record the prompt in the transcript addTranscript(`[JUDGE]\nPrompt:\n${judgePrompt}\n`); try { // Get the judge's decision const judgeResponse = await timePhase<ModelResponse>( "judge", async () => { // Check if we're in test mode with API calls skipped if (process.env.SKIP_API_CALLS === "true") { // Create a mock response for testing const mockResponse = `Mock ${judgeModelName} response for phase: judge Winner: Model A's plan Confidence Score: 0.8 Rationale: This is a mock judge response for testing purposes. In a real debate, this would contain the judge's rationale for selecting the winning candidate.`; // Add delay to simulate API call await new Promise((resolve) => setTimeout(resolve, 500)); return parseModelResponse(mockResponse, judgePrompt.length); } else { // Make actual API call const rawResponse = await sendToModel( judgePrompt, { modelName: judgeModelName, modelType: judgeModelType, tokenCount: judgePrompt.length / 4, }, sendNotification, ); return parseModelResponse(rawResponse, judgePrompt.length); } }, ); // Record token usage totalPromptTokens += judgeResponse.tokenUsage.prompt; totalCompletionTokens += judgeResponse.tokenUsage.completion; if (tokenBudget) { tokenBudget.recordUsage(judgeResponse.tokenUsage.total); } // Record the response in the transcript if (process.env.SKIP_API_CALLS === "true") { addTranscript(`[JUDGE RESPONSE (MOCK)]\n${judgeResponse.text}\n`); } else { addTranscript(`[JUDGE RESPONSE]\n${judgeResponse.text}\n`); } // Parse the judge's decision judgeResult = strategy.parseJudge( judgeResponse.text, debateContext.candidates, ); if (judgeResult.success) { if (judgeResult.winnerIdx === -1) { // Judge provided its own synthesis finalOutput = judgeResponse.text; } else { // Judge selected a winner from the candidates finalOutput = debateContext.candidates[judgeResult.winnerIdx]; } } else { // Judge failed to determine a winner addWarning("JUDGE_MALFORMED", judgeResult.error, "judge"); addFallback("judge", judgeResult.error); // Fallback to the first candidate finalOutput = debateContext.candidates[0]; } } catch (error) { // If judge phase fails, fallback to the first candidate addWarning( "JUDGE_MALFORMED", `Judge phase error: ${error instanceof Error ? error.message : String(error)}`, "judge", ); addFallback( "judge", `Error: ${error instanceof Error ? error.message : String(error)}`, ); // Use the first candidate as fallback finalOutput = debateContext.candidates[0]; } } // 6. Prepare the result metadata const meta: DebateMeta = { warnings, tokenUsage: { prompt: totalPromptTokens, completion: totalCompletionTokens, }, timings: { totalMs: performance.now() - startTime, perPhase: phaseTimings, }, strategy: strategy.toolType, rounds: debateContext.round, }; // Add winner information if we have a successful judge result await sendNotification({ level: "info", data: `Debug: judgeResult exists: ${!!judgeResult}, success: ${judgeResult?.success}, winnerIdx: ${judgeResult?.winnerIdx}`, }); if (judgeResult && judgeResult.success && judgeResult.winnerIdx >= 0) { await sendNotification({ level: "info", data: `Looking for winner at index ${judgeResult.winnerIdx} in mapping: ${JSON.stringify(candidateModelMapping)}`, }); const winnerMapping = candidateModelMapping.find( (m) => m.candidateIndex === judgeResult.winnerIdx, ); if (winnerMapping) { meta.winner = { modelId: winnerMapping.modelId, modelName: winnerMapping.modelName, }; await sendNotification({ level: "info", data: `Found winner: ${winnerMapping.modelName} (${winnerMapping.modelId})`, }); } else { await sendNotification({ level: "info", data: `No winner mapping found for index ${judgeResult.winnerIdx}`, }); } } // 7. Return the appropriate result shape based on the tool type const result: DebateResult = { toolType: options.toolType, meta, } as DebateResult; // Include debug logs if requested if (config.logLevel === "debug") { (result as any).debateLog = { transcript, fallbacks, } as DebateLog; } // Add the tool-specific result field switch (options.toolType) { case ToolType.Opinion: (result as any).opinion = finalOutput; break; case ToolType.Review: (result as any).review = finalOutput; break; } // 8. Send telemetry if enabled if (process.env.SAGE_TELEMETRY === "1") { try { // Simple anonymous telemetry logging console.info( JSON.stringify({ event: "debate_completion", toolType: options.toolType, rounds: debateContext.round, warnings: warnings.length, tokens: totalPromptTokens + totalCompletionTokens, durationMs: meta.timings.totalMs, modelsCount: debateModels.length, }), ); } catch (error) { // Ignore telemetry errors } } await sendNotification({ level: "info", data: `Debate completed for ${options.toolType} in ${Math.round(meta.timings.totalMs)}ms with ${warnings.length} warnings.`, }); return result; }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jalehman/mcp-sage'

If you have feedback or need assistance with the MCP directory API, please join our Discord server