AI MCP Gateway

router.ts•22.5 kB

/** * @file Intelligent AI Model Router * @description Core routing engine implementing N-layer model selection strategy. * * **N-Layer Architecture:** * - **L0 (Free)**: Local/OSS models (Ollama, etc.) - No API costs * - **L1 (Standard)**: OpenRouter, budget APIs - Low cost per token * - **L2 (Premium)**: OpenAI GPT-4, Claude - Higher cost, better quality * - **L3 (Elite)**: GPT-4o, Claude Opus - Highest cost, best quality * * **Routing Strategy:** * 1. Detect task complexity (low/medium/high) using free model or heuristics * 2. Select initial layer based on complexity and quality requirements * 3. Pick best model within layer by priority (from database) * 4. Escalate to higher layer if response quality is insufficient * 5. Optionally cross-check with multiple models for consensus * * **Priority System:** * - Models are sorted by `priority` field (0 = highest, 99 = lowest) * - Priority is configured in database via admin dashboard * - Within same layer, higher priority models are selected first * * @see {@link docs/ai-routing-heuristics.md} for detailed routing logic */ import { ModelLayer, getNextLayer, LAYERS_IN_ORDER, ModelConfig, } from '../config/models.js'; import { modelConfigService } from '../db/model-config.js'; import { env } from '../config/env.js'; import { logger } from '../logging/logger.js'; import { callLLM } from '../tools/llm/index.js'; import { LLMRequest, LLMResponse, RoutingContext, CrossCheckResult, TaskComplexity, } from '../mcp/types.js'; /** * Detect task complexity using a free/local LLM model. * Falls back to heuristic-based detection if no L0 model available. * * @param message - The user's input message to analyze * @returns Promise resolving to 'low', 'medium', or 'high' complexity * * @example * ```typescript * const complexity = await detectComplexity("Hello!"); * // Returns 'low' for simple greetings * * const complexity = await detectComplexity("Explain the transformer architecture"); * // Returns 'high' for technical questions * ``` */ export async function detectComplexity(message: string): Promise<TaskComplexity> { try { // Use L0 free model for complexity detection (from DB, sorted by priority) const models = await modelConfigService.getModelsByLayer('L0'); if (models.length === 0) { logger.warn('No L0 models available for complexity detection, using heuristics'); return detectComplexityHeuristic(message); } const complexityModel = models[0]; // Use first model (highest priority) const prompt = `Analyze this user message and classify its complexity level. USER MESSAGE: "${message}" Classify as: - "low": Simple greetings, short questions (≤5 words), casual chat - "medium": General questions, explanations, standard requests - "high": Complex analysis, code tasks, multi-step reasoning, technical deep-dives Respond with ONLY ONE WORD: low, medium, or high`; const response = await callLLM( { prompt, maxTokens: 10, temperature: 0 }, complexityModel ); const detectedComplexity = response.content.trim().toLowerCase(); if (['low', 'medium', 'high'].includes(detectedComplexity)) { logger.info('Complexity detected by LLM', { message: message.substring(0, 50), complexity: detectedComplexity, model: complexityModel.id }); return detectedComplexity as TaskComplexity; } logger.warn('Invalid complexity from LLM, using heuristics', { response: detectedComplexity }); return detectComplexityHeuristic(message); } catch (error) { logger.error('Complexity detection failed, using heuristics', { error: error instanceof Error ? error.message : 'Unknown' }); return detectComplexityHeuristic(message); } } /** * Fallback heuristic-based complexity detection. * Used when LLM-based detection fails or no L0 models are available. * * **Heuristics:** * - **Low**: ≤5 words, no code markers, no complex words * - **High**: Code markers (```, function, class), complex words (analyze, implement), >50 words * - **Medium**: Everything else * * @param message - The user's input message * @returns TaskComplexity based on heuristic analysis */ function detectComplexityHeuristic(message: string): TaskComplexity { const wordCount = message.split(/\s+/).length; const hasCodeMarkers = /```|function|class|import|const|let|var/.test(message); const hasComplexWords = /(explain|analyze|compare|evaluate|implement|design|architecture|algorithm)/i.test(message); if (wordCount <= 5 && !hasCodeMarkers && !hasComplexWords) { return 'low'; } else if (hasCodeMarkers || hasComplexWords || wordCount > 50) { return 'high'; } return 'medium'; } /** * Select initial routing layer based on task context. * * **Selection logic:** * 1. User-preferred layer (if specified) - Highest priority * 2. Critical quality requirement → L2 (Premium) * 3. High complexity + High quality → L1 (Standard) * 4. Default → L0 (Free) from environment variable * * @param context - Routing context containing complexity, quality, and preferences * @returns The selected ModelLayer (L0, L1, L2, or L3) */ export function selectInitialLayer(context: RoutingContext): ModelLayer { // If user specified a preferred layer, use it if (context.preferredLayer) { return context.preferredLayer; } // Critical tasks start at higher layer if (context.quality === 'critical') { return 'L2'; } // High complexity + high quality -> L1 if (context.complexity === 'high' && context.quality === 'high') { return 'L1'; } // Default: start at L0 (cheapest) return env.DEFAULT_LAYER as ModelLayer; } /** * Pick the best model from a specific layer based on task type and priority. * Models are fetched from database, already sorted by priority ASC (0 = highest). * * **Selection algorithm:** * 1. Get all enabled models from the layer (sorted by priority) * 2. Filter by task capability (code, reasoning, general) * 3. Select first capable model (highest priority) * 4. Fallback to L0 if layer has no models * * @param layer - The model layer to pick from (L0, L1, L2, L3) * @param taskType - The type of task ('code', 'reasoning', 'general') * @returns Promise resolving to the selected ModelConfig, or undefined if none available */ async function pickModelFromLayer( layer: ModelLayer, taskType: string, ): Promise<ModelConfig | undefined> { // Models are already sorted by priority (0 = highest priority) from DB const models = await modelConfigService.getModelsByLayer(layer); if (models.length === 0) { logger.warn(`No models found for layer ${layer}`); // If layer is disabled or has no models, fallback to L0 (free tier) if (layer !== 'L0') { logger.info(`Falling back to L0 (free tier) as ${layer} is unavailable`); const l0Models = await modelConfigService.getModelsByLayer('L0'); if (l0Models.length === 0) { logger.error('No L0 models available, cannot proceed'); return undefined; } // Filter by capability from L0 models const capableL0Models = l0Models.filter((m) => { if (taskType === 'code') return m.capabilities.code; if (taskType === 'reasoning') return m.capabilities.reasoning; return m.capabilities.general; }); if (capableL0Models.length === 0) { // Return first model (highest priority) as fallback return l0Models[0]; } // Return first capable model (already sorted by priority) return capableL0Models[0]; } return undefined; } // Filter by capability const capableModels = models.filter((m) => { if (taskType === 'code') return m.capabilities.code; if (taskType === 'reasoning') return m.capabilities.reasoning; return m.capabilities.general; }); if (capableModels.length === 0) { logger.warn(`No capable models found for ${taskType} in layer ${layer}`); // Return first model (highest priority) as fallback return models[0]; } // Return first capable model (already sorted by priority, 0 = highest) logger.info(`Selected model by priority`, { layer, taskType, selectedModel: capableModels[0].id, priority: capableModels[0].priority, totalCapable: capableModels.length, }); return capableModels[0]; } /** * Perform cross-check between multiple models for consensus. * Used for high-stakes requests where accuracy is critical. * * **Cross-check process:** * 1. Get all available models in the layer * 2. If <2 models, skip cross-check and use single model * 3. Query multiple models with same request * 4. Compare responses for consensus * 5. Return primary response with conflict information * * @param request - The LLM request to cross-check * @param layer - The model layer for cross-checking * @param taskType - The type of task for model selection * @returns Promise resolving to CrossCheckResult with consensus and conflicts */ async function crossCheck( request: LLMRequest, layer: ModelLayer, taskType: string, ): Promise<CrossCheckResult> { // Get models from DB, already sorted by priority const models = await modelConfigService.getModelsByLayer(layer); if (models.length < 2) { // Not enough models for cross-check, use single model const model = await pickModelFromLayer(layer, taskType); if (!model) { throw new Error(`No model available for layer ${layer}`); } const response = await callLLM(request, model); return { primary: { ...response, routingSummary: '' }, consensus: response.content, conflicts: [], routingSummary: `Single model: ${model.id} (${layer})`, }; } // Get primary and review models const primaryModel = models[0]; const reviewModel = models[1]; logger.info('Cross-checking with multiple models', { primary: primaryModel.id, review: reviewModel.id, layer, }); // Call primary model const primaryResponse = await callLLM(request, primaryModel); // Call review model with modified prompt const reviewRequest: LLMRequest = { ...request, prompt: `Review the following solution and identify any issues, bugs, or improvements: SOLUTION TO REVIEW: ${primaryResponse.content} ORIGINAL TASK: ${request.prompt} Please provide: 1. Overall assessment (good/acceptable/needs-improvement) 2. Specific issues found (if any) 3. Suggestions for improvement (if any) `, }; const reviewResponse = await callLLM(reviewRequest, reviewModel); // Improved conflict detection - check for actual negative assessment const reviewLower = reviewResponse.content.toLowerCase(); // Look for explicit negative assessments const hasNeedsImprovement = reviewLower.includes('needs-improvement') || reviewLower.includes('needs improvement'); const hasCriticalIssue = (reviewLower.includes('critical') || reviewLower.includes('major')) && (reviewLower.includes('bug') || reviewLower.includes('error')); const hasIncorrect = reviewLower.includes('incorrect') || reviewLower.includes('wrong') || reviewLower.includes('fails'); // Only flag as conflict if there are serious issues, not just suggestions const hasConflicts = hasNeedsImprovement || hasCriticalIssue || hasIncorrect; const conflicts = hasConflicts ? ['Review identified serious issues requiring escalation'] : []; // If review found issues, we might want to escalate or use arbitrator let consensus = primaryResponse.content; let arbitratorResponse: LLMResponse | undefined; if (hasConflicts && models.length >= 3) { logger.info('Conflicts detected, calling arbitrator'); const arbitratorModel = models[2]; const arbitratorRequest: LLMRequest = { ...request, prompt: `You are an arbitrator. Review these two solutions and decide which is better, or provide an improved solution. SOLUTION A: ${primaryResponse.content} REVIEW OF SOLUTION A: ${reviewResponse.content} ORIGINAL TASK: ${request.prompt} Provide the best solution:`, }; const arbResponse = await callLLM(arbitratorRequest, arbitratorModel); arbitratorResponse = { ...arbResponse, routingSummary: '' }; consensus = arbResponse.content; } const routingSummary = arbitratorResponse ? `Cross-check (3 models): ${primaryModel.id}, ${reviewModel.id}, ${arbitratorResponse.modelId} (layer ${layer})` : `Cross-check (2 models): ${primaryModel.id}, ${reviewModel.id} (layer ${layer})`; return { primary: { ...primaryResponse, routingSummary: '' }, review: { ...reviewResponse, routingSummary: '' }, arbitrator: arbitratorResponse, consensus, conflicts, routingSummary, }; } /** * Main routing function with N-layer dynamic routing */ export async function routeRequest( request: LLMRequest, context: RoutingContext, ): Promise<LLMResponse> { // Check budget constraints early - if budget is 0, force L0 layer (free models only) if (context.budget === 0) { logger.info('Budget is 0, forcing L0 layer (free models only)', { budget: context.budget, forcedLayer: 'L0', }); const model = await pickModelFromLayer('L0', context.taskType); if (!model) { throw new Error('No free models available in L0 layer'); } // Check if the selected model is actually free const isFree = model.id.includes(':free') || (model as any).pricing?.prompt === 0; if (!isFree) { logger.warn('L0 model is not free, but budget is 0', { model: model.id, budget: context.budget, }); } const response = await callLLM(request, model); return { ...response, routingSummary: `Budget enforcement: ${model.id} (layer L0, free tier only)`, }; } // If user specified a preferred model, use it directly (bypass all routing) if (context.preferredModel) { logger.info('Using user-specified model', { model: context.preferredModel, skipComplexityDetection: true, skipCrossCheck: true, skipRouting: true, }); const model = await modelConfigService.getModelById(context.preferredModel); if (!model) { logger.warn('Preferred model not found, falling back to routing', { preferredModel: context.preferredModel, }); } else { const response = await callLLM(request, model); return { ...response, routingSummary: `Direct model selection: ${model.id} (layer ${model.layer})`, }; } } // If user specified preferred layer, use it directly without complexity detection or cross-check if (context.preferredLayer) { logger.info('Using user-specified layer', { layer: context.preferredLayer, skipComplexityDetection: true, skipCrossCheck: true, }); const model = await pickModelFromLayer(context.preferredLayer, context.taskType); if (!model) { throw new Error(`No model available for layer ${context.preferredLayer}`); } const response = await callLLM(request, model); return { ...response, routingSummary: `Direct layer selection: ${model.id} (layer ${context.preferredLayer})`, }; } const enableAutoEscalate = context.enableAutoEscalate ?? env.ENABLE_AUTO_ESCALATE; let currentLayer = selectInitialLayer(context); // Only enable cross-check for high complexity tasks const shouldCrossCheck = (context.enableCrossCheck ?? env.ENABLE_CROSS_CHECK) && context.complexity === 'high'; logger.info('Routing request', { taskType: context.taskType, complexity: context.complexity, quality: context.quality, initialLayer: currentLayer, crossCheck: shouldCrossCheck, }); // Try current layer with cross-check only for high complexity if (shouldCrossCheck) { const result = await crossCheck(request, currentLayer, context.taskType); // If no conflicts, return consensus if (result.conflicts.length === 0) { return { content: result.consensus, modelId: result.primary.modelId, provider: result.primary.provider, inputTokens: result.primary.inputTokens, outputTokens: result.primary.outputTokens, cost: result.primary.cost, routingSummary: result.routingSummary + ' (no conflicts)', }; } // Conflicts detected logger.warn('Conflicts detected in cross-check', { layer: currentLayer, conflicts: result.conflicts, }); const nextLayer = getNextLayer(currentLayer); const maxLayer = env.MAX_ESCALATION_LAYER as ModelLayer; const maxLayerIndex = LAYERS_IN_ORDER.indexOf(maxLayer); const currentLayerIndex = LAYERS_IN_ORDER.indexOf(currentLayer); const canEscalate = nextLayer && currentLayerIndex < maxLayerIndex; // Prevent escalation if budget is 0 (free tier only) const allowEscalation = context.budget !== 0; if (!allowEscalation && canEscalate) { logger.info('Escalation blocked due to budget constraint', { budget: context.budget, currentLayer, suggestedLayer: nextLayer, reason: 'Budget is 0, free tier only', }); } // If auto-escalate is enabled and we can escalate and budget allows if (enableAutoEscalate && canEscalate && allowEscalation) { logger.info('Auto-escalating to next layer', { from: currentLayer, to: nextLayer, }); currentLayer = nextLayer!; // Try again at higher layer const escalatedResult = await crossCheck( request, currentLayer, context.taskType, ); return { content: escalatedResult.consensus, modelId: escalatedResult.primary.modelId, provider: escalatedResult.primary.provider, inputTokens: escalatedResult.primary.inputTokens, outputTokens: escalatedResult.primary.outputTokens, cost: escalatedResult.primary.cost, routingSummary: escalatedResult.routingSummary + ` (escalated from ${selectInitialLayer(context)})`, }; } // If auto-escalate is disabled but escalation is possible, ask for confirmation // But only if budget allows escalation if (!enableAutoEscalate && canEscalate && allowEscalation) { const isPaidLayer = currentLayer !== 'L0' || (nextLayer && nextLayer !== 'L0'); if (isPaidLayer) { logger.info('Escalation requires user confirmation', { currentLayer, suggestedLayer: nextLayer, reason: 'ENABLE_AUTO_ESCALATE is disabled and conflicts detected', }); // Generate optimized prompt for next layer const optimizedPrompt = `[ESCALATED FROM ${currentLayer} TO ${nextLayer}] ORIGINAL REQUEST: ${request.messages[request.messages.length - 1]?.content || 'N/A'} CONTEXT FROM ${currentLayer}: ${result.consensus} CONFLICTS DETECTED: ${result.conflicts.join('\n- ')} PLEASE PROVIDE: - A more accurate and detailed response - Clear resolution of the conflicts - Higher quality output suitable for ${nextLayer} tier`; // Return response with escalation confirmation requirement return { content: result.consensus, modelId: result.primary.modelId, provider: result.primary.provider, inputTokens: result.primary.inputTokens, outputTokens: result.primary.outputTokens, cost: result.primary.cost, routingSummary: result.routingSummary + ' (conflicts detected - escalation available)', requiresEscalationConfirm: true, suggestedLayer: nextLayer, escalationReason: `Conflicts detected in ${currentLayer} layer. Escalating to ${nextLayer} may provide better results (paid tier).`, optimizedPrompt, }; } } // Return arbitrated result if available, otherwise primary const finalContent = result.arbitrator ? result.consensus : result.primary.content; return { content: finalContent, modelId: result.primary.modelId, provider: result.primary.provider, inputTokens: result.primary.inputTokens, outputTokens: result.primary.outputTokens, cost: result.primary.cost, routingSummary: result.routingSummary + ' (conflicts resolved with arbitrator)', }; } else { // No cross-check, just use single model const model = await pickModelFromLayer(currentLayer, context.taskType); if (!model) { throw new Error(`No model available for layer ${currentLayer}`); } const response = await callLLM(request, model); return { ...response, routingSummary: `Single model: ${model.id} (layer ${currentLayer})`, }; } }

Latest Blog Posts

What Is Context Bloat in MCP?
By Om-Shree-0709 on December 16, 2025.
mcp
Context Bloat
MCP Moves to the Linux Foundation: Neutral Stewardship for Agentic Infrastructure
By Om-Shree-0709 on December 15, 2025.
mcp
anthropic
Linux Foundation
Code Execution with MCP: Architecting Agentic Efficiency
By Om-Shree-0709 on December 14, 2025.
mcp
Token bloat

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/babasida246/ai-mcp-gateway'

If you have feedback or need assistance with the MCP directory API, please join our Discord server