FedMCP - Federal Parliamentary Information

llmCache.ts•10.1 KiB

/** * LLM Response Cache Service * * Provides persistent caching for LLM responses using Supabase. * Reduces API costs and improves latency for repeated queries. * * Cache Key Strategy: * - Hash of: normalized message + context type + context id + model * - Normalized message = lowercase, trimmed, whitespace collapsed * * TTL Strategy: * - General queries: 24 hours * - Context-specific (MP/Bill): 6 hours (data may change) * - Dashboard/aggregations: 1 hour */ import crypto from 'crypto'; import { createClient, SupabaseClient } from '@supabase/supabase-js'; import type { ContextType } from '@/lib/types/chat'; // Cache configuration const CACHE_TTL_HOURS: Record<ContextType | 'default', number> = { general: 24, // General questions cache for 24 hours mp: 6, // MP-specific queries may have new data bill: 6, // Bill status can change dashboard: 1, // Dashboard aggregations refresh hourly lobbying: 6, // Lobbying data updates weekly spending: 6, // Expense data updates quarterly visualizer: 168, // Visualizer explanations are educational/static - cache for 7 days default: 12, // Default fallback }; // Types interface CacheEntry { response_content: string; tool_calls: any[] | null; tool_results: any[] | null; input_tokens: number; output_tokens: number; original_cost_usd: number; } interface CacheStats { total_hits: number; total_misses: number; hit_rate: number; total_tokens_saved: number; total_cost_saved: number; cache_entries: number; avg_hit_latency_ms: number; } interface CacheResult { hit: boolean; entry?: CacheEntry; latencyMs: number; } // Singleton Supabase client for cache operations let supabaseClient: SupabaseClient | null = null; function getSupabase(): SupabaseClient { if (!supabaseClient) { supabaseClient = createClient( process.env.NEXT_PUBLIC_SUPABASE_URL!, process.env.SUPABASE_SERVICE_ROLE_KEY!, { auth: { autoRefreshToken: false, persistSession: false, }, } ); } return supabaseClient; } /** * Normalize a message for consistent cache key generation */ function normalizeMessage(message: string): string { return message .toLowerCase() .trim() .replace(/\s+/g, ' ') // Collapse whitespace .replace(/[^\w\s?]/g, ''); // Remove punctuation except ? } /** * Generate a cache key from message and context */ export function generateCacheKey( message: string, context: { type: ContextType; id?: string; data?: Record<string, any> } | undefined, model: string ): string { const normalizedMessage = normalizeMessage(message); const contextType = context?.type || 'general'; const contextId = context?.id || ''; // For visualizer context, include step and view in the cache key // This ensures each step's explanation is cached separately let contextExtra = ''; if (contextType === 'visualizer' && context?.data) { contextExtra = `|${context.data.view || ''}|${context.data.step || ''}`; } const payload = `${normalizedMessage}|${contextType}|${contextId}${contextExtra}|${model}`; return crypto.createHash('sha256').update(payload).digest('hex'); } /** * Generate a message-only hash for debugging/verification */ function generateMessageHash(message: string): string { return crypto .createHash('sha256') .update(normalizeMessage(message)) .digest('hex') .substring(0, 16); } /** * Check cache for a matching response */ export async function getCachedResponse( message: string, context: { type: ContextType; id?: string; data?: Record<string, any> } | undefined, model: string ): Promise<CacheResult> { const startTime = Date.now(); const cacheKey = generateCacheKey(message, context, model); try { const supabase = getSupabase(); const { data, error } = await supabase.rpc('get_cached_response', { p_cache_key: cacheKey, }); const latencyMs = Date.now() - startTime; if (error) { console.error('[LLMCache] Error checking cache:', error); return { hit: false, latencyMs }; } if (data && data.length > 0) { const entry = data[0]; console.log(`[LLMCache] HIT: ${cacheKey.substring(0, 8)}... (${latencyMs}ms)`); // Update stats asynchronously (don't block response) updateCacheStats(true, entry.input_tokens + entry.output_tokens, entry.original_cost_usd, latencyMs) .catch((err) => console.error('[LLMCache] Stats update error:', err)); return { hit: true, entry: { response_content: entry.response_content, tool_calls: entry.tool_calls, tool_results: entry.tool_results, input_tokens: entry.input_tokens, output_tokens: entry.output_tokens, original_cost_usd: entry.original_cost_usd, }, latencyMs, }; } console.log(`[LLMCache] MISS: ${cacheKey.substring(0, 8)}... (${latencyMs}ms)`); return { hit: false, latencyMs }; } catch (error) { console.error('[LLMCache] Cache lookup error:', error); return { hit: false, latencyMs: Date.now() - startTime }; } } /** * Store a response in the cache */ export async function storeCachedResponse( message: string, context: { type: ContextType; id?: string; data?: Record<string, any> } | undefined, model: string, provider: 'anthropic' | 'openai', response: { content: string; toolCalls?: any[]; toolResults?: any[]; inputTokens: number; outputTokens: number; costUsd: number; } ): Promise<void> { const cacheKey = generateCacheKey(message, context, model); const messageHash = generateMessageHash(message); const contextType = context?.type || 'general'; const ttlHours = CACHE_TTL_HOURS[contextType] || CACHE_TTL_HOURS.default; try { const supabase = getSupabase(); const { error } = await supabase.rpc('store_cached_response', { p_cache_key: cacheKey, p_message_hash: messageHash, p_context_type: contextType, p_context_id: context?.id || null, p_model: model, p_provider: provider, p_response_content: response.content, p_tool_calls: response.toolCalls || null, p_tool_results: response.toolResults || null, p_input_tokens: response.inputTokens, p_output_tokens: response.outputTokens, p_cost_usd: response.costUsd, p_ttl_hours: ttlHours, p_message_preview: message.substring(0, 200), }); if (error) { console.error('[LLMCache] Error storing cache:', error); return; } console.log(`[LLMCache] STORED: ${cacheKey.substring(0, 8)}... (TTL: ${ttlHours}h)`); } catch (error) { console.error('[LLMCache] Store error:', error); } } /** * Update cache statistics */ async function updateCacheStats( isHit: boolean, tokensSaved: number = 0, costSaved: number = 0, latencyMs: number = 0 ): Promise<void> { try { const supabase = getSupabase(); await supabase.rpc('update_cache_stats', { p_is_hit: isHit, p_tokens_saved: tokensSaved, p_cost_saved: costSaved, p_latency_ms: latencyMs, }); } catch (error) { // Don't throw - stats are non-critical console.error('[LLMCache] Stats update failed:', error); } } /** * Get cache statistics summary */ export async function getCacheStats(days: number = 30): Promise<CacheStats | null> { try { const supabase = getSupabase(); const { data, error } = await supabase.rpc('get_cache_stats_summary', { p_days: days, }); if (error) { console.error('[LLMCache] Error getting stats:', error); return null; } if (data && data.length > 0) { return { total_hits: data[0].total_hits, total_misses: data[0].total_misses, hit_rate: data[0].hit_rate, total_tokens_saved: data[0].total_tokens_saved, total_cost_saved: data[0].total_cost_saved, cache_entries: data[0].cache_entries, avg_hit_latency_ms: data[0].avg_hit_latency_ms, }; } return null; } catch (error) { console.error('[LLMCache] Stats fetch error:', error); return null; } } /** * Clear expired cache entries (call periodically) */ export async function cleanupExpiredCache(): Promise<number> { try { const supabase = getSupabase(); const { data, error } = await supabase.rpc('cleanup_expired_cache'); if (error) { console.error('[LLMCache] Cleanup error:', error); return 0; } return data || 0; } catch (error) { console.error('[LLMCache] Cleanup failed:', error); return 0; } } /** * Invalidate cache entries by context * Useful when underlying data changes (e.g., new Hansard import) */ export async function invalidateCacheByContext( contextType: ContextType, contextId?: string ): Promise<number> { try { const supabase = getSupabase(); let query = supabase .from('llm_response_cache') .delete() .eq('context_type', contextType); if (contextId) { query = query.eq('context_id', contextId); } const { error, count } = await query; if (error) { console.error('[LLMCache] Invalidation error:', error); return 0; } console.log(`[LLMCache] Invalidated ${count} entries for ${contextType}/${contextId || '*'}`); return count || 0; } catch (error) { console.error('[LLMCache] Invalidation failed:', error); return 0; } } /** * Clear all cache entries (admin only) */ export async function clearAllCache(): Promise<number> { try { const supabase = getSupabase(); const { error, count } = await supabase .from('llm_response_cache') .delete() .neq('id', '00000000-0000-0000-0000-000000000000'); // Delete all if (error) { console.error('[LLMCache] Clear all error:', error); return 0; } console.log(`[LLMCache] Cleared ${count} cache entries`); return count || 0; } catch (error) { console.error('[LLMCache] Clear failed:', error); return 0; } } /** * Check if caching is enabled (Supabase credentials available) */ export function isCacheEnabled(): boolean { return !!( process.env.NEXT_PUBLIC_SUPABASE_URL && process.env.SUPABASE_SERVICE_ROLE_KEY ); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/northernvariables/FedMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

llmCache.ts•10.1 KiB