Skip to main content
Glama
ChatContextBuilder.ts40.7 kB
/** * ChatContextBuilder - Main orchestrator for chat context optimization * * Implements multiple strategies for building optimal message context: * - full: Include all messages (no optimization) * - last-n: Include only the last N messages * - summary+recent: Combine conversation summary with recent messages * - span-retrieval: Use embedding-based retrieval for relevant spans + recent * * @module services/chat/ChatContextBuilder */ import { db } from '../../db/postgres.js'; import { logger } from '../../logging/logger.js'; import { tokenEstimator, estimateTokensSync } from './TokenEstimator.js'; import { embeddingService } from './EmbeddingService.js'; import { SpanRetriever, SpanRetrievalConfig } from './SpanRetriever.js'; // ============================================================================ // Types & Interfaces // ============================================================================ /** * Context building strategy */ export type ChatContextStrategy = 'full' | 'last-n' | 'summary+recent' | 'span-retrieval'; /** * Configuration for ChatContextBuilder */ export interface ChatContextConfig { /** Strategy to use for context building */ strategy: ChatContextStrategy; /** Maximum tokens allowed for the entire prompt (system + context + user message) */ maxPromptTokens: number; /** Minimum number of recent messages to always include */ recentMinMessages: number; /** Maximum recent messages to include (for last-n strategy) */ recentMaxMessages: number; /** Number of top similar spans to retrieve (for span-retrieval) */ spanTopK: number; /** Radius around each hit to expand (for span-retrieval) */ spanRadius: number; /** Fraction of token budget for span retrieval vs recent (0.0-1.0) */ spanBudgetRatio: number; /** Minimum similarity score for span retrieval */ spanMinSimilarity: number; /** Token threshold to trigger summarization */ summarizationThreshold: number; /** System prompt to prepend (optional) */ systemPrompt?: string; } /** * A single message in the context */ export interface ContextMessage { role: 'system' | 'user' | 'assistant'; content: string; name?: string; tokenEstimate?: number; } /** * Parameters for building context */ export interface BuildContextParams { /** Conversation/thread ID */ conversationId: string; /** The current user message to respond to */ currentUserMessage: string; /** Model ID (affects token estimation) */ modelId?: string; /** Override config for this specific build */ configOverrides?: Partial<ChatContextConfig>; /** Project ID for config resolution */ projectId?: string; /** Tool ID for config resolution */ toolId?: string; } /** * Result of building context */ export interface BuildContextResult { /** The final messages array to send to LLM */ messages: ContextMessage[]; /** Total estimated tokens */ totalTokens: number; /** Strategy that was used */ strategyUsed: ChatContextStrategy; /** Whether summarization was triggered */ summarizationTriggered: boolean; /** Number of messages included */ messageCount: number; /** Metadata for observability */ metadata: { recentMessagesIncluded: number; spansRetrieved: number; summaryIncluded: boolean; tokenBudget: number; tokenUsed: number; }; } // ============================================================================ // Default Configuration // ============================================================================ export const DEFAULT_CHAT_CONTEXT_CONFIG: ChatContextConfig = { strategy: 'summary+recent', maxPromptTokens: 4096, recentMinMessages: 4, recentMaxMessages: 20, spanTopK: 5, spanRadius: 2, spanBudgetRatio: 0.4, // 40% for spans, 60% for recent spanMinSimilarity: 0.7, summarizationThreshold: 2000, // Summarize when old messages exceed this systemPrompt: undefined, }; // ============================================================================ // ChatContextBuilder Class // ============================================================================ export class ChatContextBuilder { private spanRetriever: SpanRetriever; constructor() { this.spanRetriever = new SpanRetriever(embeddingService, tokenEstimator); } /** * Build optimized context for a chat completion request */ async buildContext(params: BuildContextParams): Promise<BuildContextResult> { const startTime = Date.now(); const config = this.resolveConfig(params); logger.debug('[ChatContextBuilder] Building context', { conversationId: params.conversationId, strategy: config.strategy, maxTokens: config.maxPromptTokens, }); try { let result: BuildContextResult; switch (config.strategy) { case 'full': result = await this.buildFullContext(params, config); break; case 'last-n': result = await this.buildLastNContext(params, config); break; case 'summary+recent': result = await this.buildSummaryRecentContext(params, config); break; case 'span-retrieval': result = await this.buildSpanRetrievalContext(params, config); break; default: // Fallback to summary+recent result = await this.buildSummaryRecentContext(params, config); } const duration = Date.now() - startTime; // Log detailed context information for debugging logger.info('[ChatContextBuilder] Context built successfully', { conversationId: params.conversationId, strategy: result.strategyUsed, totalTokens: result.totalTokens, messageCount: result.messageCount, durationMs: duration, metadata: result.metadata, }); // Log the actual messages being sent (preview) logger.debug('[ChatContextBuilder] Messages in context', { conversationId: params.conversationId, messages: result.messages.map((msg, idx) => ({ index: idx, role: msg.role, contentPreview: msg.content.substring(0, 150) + (msg.content.length > 150 ? '...' : ''), tokens: msg.tokenEstimate, })), }); return result; } catch (error) { logger.error('[ChatContextBuilder] Failed to build context', { conversationId: params.conversationId, error: error instanceof Error ? error.message : String(error), }); // Fallback: return just the current message return this.buildFallbackContext(params, config); } } /** * Resolve configuration from various sources */ private resolveConfig(params: BuildContextParams): ChatContextConfig { // Start with defaults let config = { ...DEFAULT_CHAT_CONTEXT_CONFIG }; // TODO: Load project-specific config from DB // if (params.projectId) { // const projectConfig = await this.loadProjectConfig(params.projectId); // config = { ...config, ...projectConfig }; // } // TODO: Load tool-specific config // if (params.toolId) { // const toolConfig = await this.loadToolConfig(params.toolId); // config = { ...config, ...toolConfig }; // } // TODO: Load model-specific limits // if (params.modelId) { // const modelLimits = await this.loadModelLimits(params.modelId); // config.maxPromptTokens = Math.min(config.maxPromptTokens, modelLimits.contextWindow); // } // Apply runtime overrides if (params.configOverrides) { config = { ...config, ...params.configOverrides }; } return config; } // -------------------------------------------------------------------------- // Strategy: Full (no optimization) // -------------------------------------------------------------------------- private async buildFullContext( params: BuildContextParams, config: ChatContextConfig ): Promise<BuildContextResult> { const messages: ContextMessage[] = []; let totalTokens = 0; // Add system prompt if configured if (config.systemPrompt) { const systemTokens = estimateTokensSync(config.systemPrompt, params.modelId); messages.push({ role: 'system', content: config.systemPrompt, tokenEstimate: systemTokens }); totalTokens += systemTokens; } // Load all messages from conversation const allMessages = await this.loadAllMessages(params.conversationId); for (const msg of allMessages) { const tokenEstimate = msg.token_estimate || estimateTokensSync(msg.content, params.modelId); messages.push({ role: msg.role, content: msg.content, tokenEstimate, }); totalTokens += tokenEstimate; } // Add current user message const currentTokens = estimateTokensSync(params.currentUserMessage, params.modelId); messages.push({ role: 'user', content: params.currentUserMessage, tokenEstimate: currentTokens, }); totalTokens += currentTokens; return { messages, totalTokens, strategyUsed: 'full', summarizationTriggered: false, messageCount: messages.length, metadata: { recentMessagesIncluded: allMessages.length, spansRetrieved: 0, summaryIncluded: false, tokenBudget: config.maxPromptTokens, tokenUsed: totalTokens, }, }; } // -------------------------------------------------------------------------- // Strategy: Last-N // -------------------------------------------------------------------------- private async buildLastNContext( params: BuildContextParams, config: ChatContextConfig ): Promise<BuildContextResult> { const messages: ContextMessage[] = []; let totalTokens = 0; // Add system prompt if configured if (config.systemPrompt) { const systemTokens = estimateTokensSync(config.systemPrompt, params.modelId); messages.push({ role: 'system', content: config.systemPrompt, tokenEstimate: systemTokens }); totalTokens += systemTokens; } // Reserve tokens for current message const currentTokens = estimateTokensSync(params.currentUserMessage, params.modelId); // Note: availableBudget calculated for future use in pagination const _availableBudget = config.maxPromptTokens - totalTokens - currentTokens; // Load recent messages (up to maxMessages) const recentMessages = await this.loadRecentMessages( params.conversationId, config.recentMaxMessages ); // Add messages until budget is exhausted let recentIncluded = 0; for (const msg of recentMessages) { const tokenEstimate = msg.token_estimate || estimateTokensSync(msg.content, params.modelId); if (totalTokens + tokenEstimate + currentTokens > config.maxPromptTokens) { // Budget exceeded, but ensure we include at least minMessages if (recentIncluded >= config.recentMinMessages) { break; } } messages.push({ role: msg.role, content: msg.content, tokenEstimate, }); totalTokens += tokenEstimate; recentIncluded++; } // Add current user message messages.push({ role: 'user', content: params.currentUserMessage, tokenEstimate: currentTokens, }); totalTokens += currentTokens; return { messages, totalTokens, strategyUsed: 'last-n', summarizationTriggered: false, messageCount: messages.length, metadata: { recentMessagesIncluded: recentIncluded, spansRetrieved: 0, summaryIncluded: false, tokenBudget: config.maxPromptTokens, tokenUsed: totalTokens, }, }; } // -------------------------------------------------------------------------- // Strategy: Summary + Recent // -------------------------------------------------------------------------- private async buildSummaryRecentContext( params: BuildContextParams, config: ChatContextConfig ): Promise<BuildContextResult> { const messages: ContextMessage[] = []; let totalTokens = 0; let summarizationTriggered = false; let summaryIncluded = false; // Add system prompt if configured if (config.systemPrompt) { const systemTokens = estimateTokensSync(config.systemPrompt, params.modelId); messages.push({ role: 'system', content: config.systemPrompt, tokenEstimate: systemTokens }); totalTokens += systemTokens; } // Reserve tokens for current message const currentTokens = estimateTokensSync(params.currentUserMessage, params.modelId); // Load conversation to get existing summary const conversation = await this.loadConversation(params.conversationId); // Check if we need to generate/update summary const oldMessages = await this.loadOldMessages(params.conversationId, config.recentMaxMessages); const oldMessagesTokens = oldMessages.reduce((sum, m) => sum + (m.token_estimate || estimateTokensSync(m.content, params.modelId)), 0 ); if (oldMessagesTokens > config.summarizationThreshold && !conversation?.summary) { // Trigger summarization asynchronously (don't block the request) summarizationTriggered = true; this.triggerSummarizationAsync(params.conversationId, oldMessages); } // Include summary if available if (conversation?.summary) { const summaryTokens = conversation.summary_token_estimate || estimateTokensSync(conversation.summary, params.modelId); // Wrap summary in a system or assistant message const summaryMessage: ContextMessage = { role: 'system', content: `[Previous conversation summary]\n${conversation.summary}`, tokenEstimate: summaryTokens, }; messages.push(summaryMessage); totalTokens += summaryTokens; summaryIncluded = true; } // Calculate remaining budget for recent messages // Note: recentBudget calculated for reference const _recentBudget = config.maxPromptTokens - totalTokens - currentTokens; // Load recent messages const recentMessages = await this.loadRecentMessages( params.conversationId, config.recentMaxMessages ); // Add recent messages within budget let recentIncluded = 0; for (const msg of recentMessages) { const tokenEstimate = msg.token_estimate || estimateTokensSync(msg.content, params.modelId); if (totalTokens + tokenEstimate + currentTokens > config.maxPromptTokens) { if (recentIncluded >= config.recentMinMessages) { break; } } messages.push({ role: msg.role, content: msg.content, tokenEstimate, }); totalTokens += tokenEstimate; recentIncluded++; } // Add current user message messages.push({ role: 'user', content: params.currentUserMessage, tokenEstimate: currentTokens, }); totalTokens += currentTokens; return { messages, totalTokens, strategyUsed: 'summary+recent', summarizationTriggered, messageCount: messages.length, metadata: { recentMessagesIncluded: recentIncluded, spansRetrieved: 0, summaryIncluded, tokenBudget: config.maxPromptTokens, tokenUsed: totalTokens, }, }; } // -------------------------------------------------------------------------- // Strategy: Span Retrieval // -------------------------------------------------------------------------- private async buildSpanRetrievalContext( params: BuildContextParams, config: ChatContextConfig ): Promise<BuildContextResult> { const messages: ContextMessage[] = []; let totalTokens = 0; let summarizationTriggered = false; let summaryIncluded = false; // Add system prompt if configured if (config.systemPrompt) { const systemTokens = estimateTokensSync(config.systemPrompt, params.modelId); messages.push({ role: 'system', content: config.systemPrompt, tokenEstimate: systemTokens }); totalTokens += systemTokens; } // Reserve tokens for current message const currentTokens = estimateTokensSync(params.currentUserMessage, params.modelId); // Load conversation for summary const conversation = await this.loadConversation(params.conversationId); // Include summary if available if (conversation?.summary) { const summaryTokens = conversation.summary_token_estimate || estimateTokensSync(conversation.summary, params.modelId); const summaryMessage: ContextMessage = { role: 'system', content: `[Previous conversation summary]\n${conversation.summary}`, tokenEstimate: summaryTokens, }; messages.push(summaryMessage); totalTokens += summaryTokens; summaryIncluded = true; } // Calculate budget splits const availableBudget = config.maxPromptTokens - totalTokens - currentTokens; const spanBudget = Math.floor(availableBudget * config.spanBudgetRatio); // Note: recentBudget calculated for potential future use in pagination const _recentBudget = availableBudget - spanBudget; // Retrieve relevant spans using embeddings const spanConfig: SpanRetrievalConfig = { topK: config.spanTopK, radius: config.spanRadius, tokenBudget: spanBudget, minSimilarity: config.spanMinSimilarity, }; let spansRetrieved = 0; try { const spanResult = await this.spanRetriever.retrieveSpans({ conversationId: params.conversationId, queryText: params.currentUserMessage, config: spanConfig, }); // Add span messages (marked with context) if (spanResult.allMessages.length > 0) { const spanContent = spanResult.allMessages.map(s => `[${s.role}]: ${s.content}` ).join('\n---\n'); const spanTokens = estimateTokensSync(spanContent, params.modelId); messages.push({ role: 'system', content: `[Relevant context from earlier in conversation]\n${spanContent}`, tokenEstimate: spanTokens, }); totalTokens += spanTokens; spansRetrieved = spanResult.allMessages.length; } } catch (error) { logger.warn('[ChatContextBuilder] Span retrieval failed, falling back to recent only', { error: error instanceof Error ? error.message : String(error), }); } // Add recent messages const recentMessages = await this.loadRecentMessages( params.conversationId, config.recentMaxMessages ); let recentIncluded = 0; for (const msg of recentMessages) { const tokenEstimate = msg.token_estimate || estimateTokensSync(msg.content, params.modelId); if (totalTokens + tokenEstimate + currentTokens > config.maxPromptTokens) { if (recentIncluded >= config.recentMinMessages) { break; } } messages.push({ role: msg.role, content: msg.content, tokenEstimate, }); totalTokens += tokenEstimate; recentIncluded++; } // Add current user message messages.push({ role: 'user', content: params.currentUserMessage, tokenEstimate: currentTokens, }); totalTokens += currentTokens; // Check if summarization should be triggered const oldMessages = await this.loadOldMessages(params.conversationId, config.recentMaxMessages); const oldMessagesTokens = oldMessages.reduce((sum, m) => sum + (m.token_estimate || estimateTokensSync(m.content, params.modelId)), 0 ); if (oldMessagesTokens > config.summarizationThreshold && !conversation?.summary) { summarizationTriggered = true; this.triggerSummarizationAsync(params.conversationId, oldMessages); } return { messages, totalTokens, strategyUsed: 'span-retrieval', summarizationTriggered, messageCount: messages.length, metadata: { recentMessagesIncluded: recentIncluded, spansRetrieved, summaryIncluded, tokenBudget: config.maxPromptTokens, tokenUsed: totalTokens, }, }; } // -------------------------------------------------------------------------- // Fallback Context // -------------------------------------------------------------------------- private buildFallbackContext( params: BuildContextParams, config: ChatContextConfig ): BuildContextResult { const messages: ContextMessage[] = []; let totalTokens = 0; // Add system prompt if configured if (config.systemPrompt) { const systemTokens = estimateTokensSync(config.systemPrompt, params.modelId); messages.push({ role: 'system', content: config.systemPrompt, tokenEstimate: systemTokens }); totalTokens += systemTokens; } // Add only current user message const currentTokens = estimateTokensSync(params.currentUserMessage, params.modelId); messages.push({ role: 'user', content: params.currentUserMessage, tokenEstimate: currentTokens, }); totalTokens += currentTokens; return { messages, totalTokens, strategyUsed: 'full', summarizationTriggered: false, messageCount: messages.length, metadata: { recentMessagesIncluded: 0, spansRetrieved: 0, summaryIncluded: false, tokenBudget: config.maxPromptTokens, tokenUsed: totalTokens, }, }; } // -------------------------------------------------------------------------- // Database Helpers // -------------------------------------------------------------------------- private async loadConversation(conversationId: string): Promise<{ id: string; summary?: string; summary_token_estimate?: number; summary_updated_at?: Date; } | null> { try { const result = await db.query<{ id: string; summary?: string; summary_token_estimate?: number; summary_updated_at?: Date; }>( `SELECT id, summary, summary_token_estimate, summary_updated_at FROM conversations WHERE id = $1`, [conversationId] ); if (!result || result.rows.length === 0) { return null; } return result.rows[0]; } catch (error) { logger.error('[ChatContextBuilder] Failed to load conversation', { conversationId, error }); return null; } } private async loadAllMessages(conversationId: string): Promise<Array<{ id: string; role: 'user' | 'assistant'; content: string; turn_index: number; token_estimate?: number; }>> { try { const result = await db.query<{ id: string; role: 'user' | 'assistant'; content: string; turn_index: number; token_estimate?: number; }>( `SELECT id, role, content, turn_index, token_estimate FROM messages WHERE conversation_id = $1 ORDER BY turn_index ASC`, [conversationId] ); if (!result) return []; return result.rows; } catch (error) { logger.error('[ChatContextBuilder] Failed to load messages', { conversationId, error }); return []; } } private async loadRecentMessages(conversationId: string, limit: number): Promise<Array<{ id: string; role: 'user' | 'assistant'; content: string; turn_index: number; token_estimate?: number; }>> { try { // Get the most recent N messages, then reverse to maintain chronological order const result = await db.query<{ id: string; role: 'user' | 'assistant'; content: string; turn_index: number; token_estimate?: number; }>( `SELECT id, role, content, turn_index, token_estimate FROM messages WHERE conversation_id = $1 ORDER BY turn_index DESC LIMIT $2`, [conversationId, limit] ); if (!result) return []; return result.rows.reverse(); } catch (error) { logger.error('[ChatContextBuilder] Failed to load recent messages', { conversationId, error }); return []; } } private async loadOldMessages(conversationId: string, recentCount: number): Promise<Array<{ id: string; role: 'user' | 'assistant'; content: string; turn_index: number; token_estimate?: number; }>> { try { // Get messages older than the most recent N const result = await db.query<{ id: string; role: 'user' | 'assistant'; content: string; turn_index: number; token_estimate?: number; }>( `WITH recent AS ( SELECT turn_index FROM messages WHERE conversation_id = $1 ORDER BY turn_index DESC LIMIT $2 ) SELECT m.id, m.role, m.content, m.turn_index, m.token_estimate FROM messages m WHERE m.conversation_id = $1 AND m.turn_index < COALESCE((SELECT MIN(turn_index) FROM recent), 999999) AND m.is_summarized = false ORDER BY m.turn_index ASC`, [conversationId, recentCount] ); if (!result) return []; return result.rows; } catch (error) { logger.error('[ChatContextBuilder] Failed to load old messages', { conversationId, error }); return []; } } // -------------------------------------------------------------------------- // Summarization // -------------------------------------------------------------------------- /** * Trigger summarization in background (non-blocking) */ private triggerSummarizationAsync( conversationId: string, messages: Array<{ role: string; content: string; id: string }> ): void { // Run summarization without awaiting this.generateSummary(conversationId, messages).catch(error => { logger.error('[ChatContextBuilder] Async summarization failed', { conversationId, error: error instanceof Error ? error.message : String(error), }); }); } /** * Generate a summary for old messages and store it */ private async generateSummary( conversationId: string, messages: Array<{ role: string; content: string; id: string }> ): Promise<void> { if (messages.length === 0) return; logger.info('[ChatContextBuilder] Generating summary', { conversationId, messageCount: messages.length, }); try { // Format messages for summarization const transcript = messages.map(m => `${m.role.toUpperCase()}: ${m.content}` ).join('\n\n'); const summaryPrompt = `Please provide a concise summary of the following conversation, capturing the key points, decisions made, and important context that would be helpful for continuing the conversation. Keep the summary under 500 words. Conversation: ${transcript} Summary:`; // Use L0 model for summarization (efficient model) let summary: string; try { // Try to use LLM for summarization const { routeRequest } = await import('../../routing/router.js'); const summaryResult = await routeRequest( { prompt: summaryPrompt }, { quality: 'fast', complexity: 'low', taskType: 'general' as const, } ); summary = summaryResult.content; logger.debug('[ChatContextBuilder] LLM-based summary generated', { conversationId, summaryLength: summary.length, model: summaryResult.modelId, }); } catch (error) { // Fallback to extractive summary if LLM fails logger.warn('[ChatContextBuilder] LLM summarization failed, using extractive', { conversationId, error: error instanceof Error ? error.message : String(error), }); summary = this.createExtractiveSum(messages); } const summaryTokens = estimateTokensSync(summary); // Update conversation with summary await db.query( `UPDATE conversations SET summary = $1, summary_token_estimate = $2, summary_updated_at = NOW() WHERE id = $3`, [summary, summaryTokens, conversationId] ); // Mark messages as summarized const messageIds = messages.map(m => m.id); await db.query( `UPDATE messages SET is_summarized = true WHERE id = ANY($1)`, [messageIds] ); logger.info('[ChatContextBuilder] Summary generated and stored', { conversationId, summaryTokens, messagesMarked: messageIds.length, }); } catch (error) { logger.error('[ChatContextBuilder] Failed to generate summary', { conversationId, error: error instanceof Error ? error.message : String(error), }); throw error; } } /** * Simple extractive summarization (placeholder until LLM integration) */ private createExtractiveSum(messages: Array<{ role: string; content: string }>): string { // Take first and last messages, plus key points const parts: string[] = []; if (messages.length > 0) { parts.push(`Initial topic: ${messages[0].content.slice(0, 200)}...`); } if (messages.length > 2) { const midpoint = Math.floor(messages.length / 2); parts.push(`Mid-conversation: ${messages[midpoint].content.slice(0, 150)}...`); } if (messages.length > 1) { const last = messages[messages.length - 1]; parts.push(`Recent context: ${last.content.slice(0, 200)}...`); } parts.push(`Total exchanges: ${messages.length} messages`); return parts.join('\n\n'); } // -------------------------------------------------------------------------- // Embedding Generation (for new messages) // -------------------------------------------------------------------------- /** * Generate and store embedding for a message */ async generateMessageEmbedding(messageId: string, content: string): Promise<void> { try { const embeddingResult = await embeddingService.getEmbedding(content); await db.query( `UPDATE messages SET embedding = $1 WHERE id = $2`, [`[${embeddingResult.embedding.join(',')}]`, messageId] ); logger.debug('[ChatContextBuilder] Message embedding stored', { messageId }); } catch (error) { logger.warn('[ChatContextBuilder] Failed to generate message embedding', { messageId, error: error instanceof Error ? error.message : String(error), }); // Non-critical, don't throw } } /** * Batch generate embeddings for messages without embeddings */ async backfillEmbeddings(conversationId: string, batchSize: number = 10): Promise<number> { let processed = 0; try { // Find messages without embeddings const result = await db.query<{ id: string; content: string }>( `SELECT id, content FROM messages WHERE conversation_id = $1 AND embedding IS NULL ORDER BY turn_index ASC LIMIT $2`, [conversationId, batchSize] ); if (!result || result.rows.length === 0) { return 0; } // Generate embeddings in batch const texts = result.rows.map(r => r.content); const embeddings = await embeddingService.getBatchEmbeddings({ texts }); // Store embeddings for (let i = 0; i < result.rows.length; i++) { const messageId = result.rows[i].id; const embeddingResult = embeddings[i]; if (embeddingResult) { await db.query( `UPDATE messages SET embedding = $1 WHERE id = $2`, [`[${embeddingResult.embedding.join(',')}]`, messageId] ); processed++; } } logger.info('[ChatContextBuilder] Backfilled embeddings', { conversationId, processed, total: result.rows.length, }); } catch (error) { logger.error('[ChatContextBuilder] Embedding backfill failed', { conversationId, error: error instanceof Error ? error.message : String(error), }); } return processed; } } // ============================================================================ // Singleton Export // ============================================================================ export const chatContextBuilder = new ChatContextBuilder(); // ============================================================================ // Config Resolution Helper (for integration) // ============================================================================ /** * Resolve chat context config for a specific project/tool/model combination * Loads from database with fallback chain: project+tool -> project -> tool -> default */ export async function resolveChatContextConfig( projectId?: string, toolId?: string, modelId?: string ): Promise<ChatContextConfig> { // Start with defaults let config = { ...DEFAULT_CHAT_CONTEXT_CONFIG }; // Try to load from database try { // Build query with fallback chain const query = ` SELECT * FROM chat_context_config WHERE ( (project_id = $1 AND tool_id = $2) OR (project_id = $1 AND tool_id IS NULL) OR (project_id IS NULL AND tool_id = $2) OR (project_id IS NULL AND tool_id IS NULL) ) ORDER BY CASE WHEN project_id IS NOT NULL AND tool_id IS NOT NULL THEN 1 WHEN project_id IS NOT NULL THEN 2 WHEN tool_id IS NOT NULL THEN 3 ELSE 4 END LIMIT 1 `; const result = await db.query<{ strategy: ChatContextStrategy; max_prompt_tokens: number; recent_min_messages: number; enable_summarization: boolean; summary_trigger_tokens: number; span_top_k: number; span_radius: number; span_budget_ratio: number; }>(query, [projectId || null, toolId || null]); if (result && result.rows.length > 0) { const dbConfig = result.rows[0]; config = { strategy: dbConfig.strategy, maxPromptTokens: dbConfig.max_prompt_tokens, recentMinMessages: dbConfig.recent_min_messages, recentMaxMessages: DEFAULT_CHAT_CONTEXT_CONFIG.recentMaxMessages, spanTopK: dbConfig.span_top_k, spanRadius: dbConfig.span_radius, spanBudgetRatio: Number(dbConfig.span_budget_ratio), spanMinSimilarity: DEFAULT_CHAT_CONTEXT_CONFIG.spanMinSimilarity, summarizationThreshold: dbConfig.summary_trigger_tokens, systemPrompt: undefined, }; logger.debug('[ChatContextBuilder] Loaded config from database', { projectId, toolId, strategy: config.strategy, }); } } catch (error) { logger.debug('[ChatContextBuilder] Could not load config from DB, using defaults', { error: error instanceof Error ? error.message : String(error), }); } // Adjust token limits based on model if (modelId) { if (modelId.includes('gpt-4')) { config.maxPromptTokens = Math.max(config.maxPromptTokens, 8192); } else if (modelId.includes('claude-3-opus')) { config.maxPromptTokens = Math.max(config.maxPromptTokens, 150000); } else if (modelId.includes('claude-3')) { config.maxPromptTokens = Math.max(config.maxPromptTokens, 100000); } else if (modelId.includes('claude')) { config.maxPromptTokens = Math.max(config.maxPromptTokens, 100000); } else if (modelId.includes('gpt-3.5')) { config.maxPromptTokens = Math.min(config.maxPromptTokens, 4096); } logger.debug('[ChatContextBuilder] Adjusted token limit for model', { modelId, maxPromptTokens: config.maxPromptTokens, }); } return config; }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/babasida246/ai-mcp-gateway'

If you have feedback or need assistance with the MCP directory API, please join our Discord server