/**
* LLM Response Cache Service
*
* Provides persistent caching for LLM responses using Supabase.
* Reduces API costs and improves latency for repeated queries.
*
* Cache Key Strategy:
* - Hash of: normalized message + context type + context id + model
* - Normalized message = lowercase, trimmed, whitespace collapsed
*
* TTL Strategy:
* - General queries: 24 hours
* - Context-specific (MP/Bill): 6 hours (data may change)
* - Dashboard/aggregations: 1 hour
*/
import crypto from 'crypto';
import { createClient, SupabaseClient } from '@supabase/supabase-js';
import type { ContextType } from '@/lib/types/chat';
// Cache configuration
const CACHE_TTL_HOURS: Record<ContextType | 'default', number> = {
general: 24, // General questions cache for 24 hours
mp: 6, // MP-specific queries may have new data
bill: 6, // Bill status can change
dashboard: 1, // Dashboard aggregations refresh hourly
lobbying: 6, // Lobbying data updates weekly
spending: 6, // Expense data updates quarterly
visualizer: 168, // Visualizer explanations are educational/static - cache for 7 days
default: 12, // Default fallback
};
// Types
interface CacheEntry {
response_content: string;
tool_calls: any[] | null;
tool_results: any[] | null;
input_tokens: number;
output_tokens: number;
original_cost_usd: number;
}
interface CacheStats {
total_hits: number;
total_misses: number;
hit_rate: number;
total_tokens_saved: number;
total_cost_saved: number;
cache_entries: number;
avg_hit_latency_ms: number;
}
interface CacheResult {
hit: boolean;
entry?: CacheEntry;
latencyMs: number;
}
// Singleton Supabase client for cache operations
let supabaseClient: SupabaseClient | null = null;
function getSupabase(): SupabaseClient {
if (!supabaseClient) {
supabaseClient = createClient(
process.env.NEXT_PUBLIC_SUPABASE_URL!,
process.env.SUPABASE_SERVICE_ROLE_KEY!,
{
auth: {
autoRefreshToken: false,
persistSession: false,
},
}
);
}
return supabaseClient;
}
/**
* Normalize a message for consistent cache key generation
*/
function normalizeMessage(message: string): string {
return message
.toLowerCase()
.trim()
.replace(/\s+/g, ' ') // Collapse whitespace
.replace(/[^\w\s?]/g, ''); // Remove punctuation except ?
}
/**
* Generate a cache key from message and context
*/
export function generateCacheKey(
message: string,
context: { type: ContextType; id?: string; data?: Record<string, any> } | undefined,
model: string
): string {
const normalizedMessage = normalizeMessage(message);
const contextType = context?.type || 'general';
const contextId = context?.id || '';
// For visualizer context, include step and view in the cache key
// This ensures each step's explanation is cached separately
let contextExtra = '';
if (contextType === 'visualizer' && context?.data) {
contextExtra = `|${context.data.view || ''}|${context.data.step || ''}`;
}
const payload = `${normalizedMessage}|${contextType}|${contextId}${contextExtra}|${model}`;
return crypto.createHash('sha256').update(payload).digest('hex');
}
/**
* Generate a message-only hash for debugging/verification
*/
function generateMessageHash(message: string): string {
return crypto
.createHash('sha256')
.update(normalizeMessage(message))
.digest('hex')
.substring(0, 16);
}
/**
* Check cache for a matching response
*/
export async function getCachedResponse(
message: string,
context: { type: ContextType; id?: string; data?: Record<string, any> } | undefined,
model: string
): Promise<CacheResult> {
const startTime = Date.now();
const cacheKey = generateCacheKey(message, context, model);
try {
const supabase = getSupabase();
const { data, error } = await supabase.rpc('get_cached_response', {
p_cache_key: cacheKey,
});
const latencyMs = Date.now() - startTime;
if (error) {
console.error('[LLMCache] Error checking cache:', error);
return { hit: false, latencyMs };
}
if (data && data.length > 0) {
const entry = data[0];
console.log(`[LLMCache] HIT: ${cacheKey.substring(0, 8)}... (${latencyMs}ms)`);
// Update stats asynchronously (don't block response)
updateCacheStats(true, entry.input_tokens + entry.output_tokens, entry.original_cost_usd, latencyMs)
.catch((err) => console.error('[LLMCache] Stats update error:', err));
return {
hit: true,
entry: {
response_content: entry.response_content,
tool_calls: entry.tool_calls,
tool_results: entry.tool_results,
input_tokens: entry.input_tokens,
output_tokens: entry.output_tokens,
original_cost_usd: entry.original_cost_usd,
},
latencyMs,
};
}
console.log(`[LLMCache] MISS: ${cacheKey.substring(0, 8)}... (${latencyMs}ms)`);
return { hit: false, latencyMs };
} catch (error) {
console.error('[LLMCache] Cache lookup error:', error);
return { hit: false, latencyMs: Date.now() - startTime };
}
}
/**
* Store a response in the cache
*/
export async function storeCachedResponse(
message: string,
context: { type: ContextType; id?: string; data?: Record<string, any> } | undefined,
model: string,
provider: 'anthropic' | 'openai',
response: {
content: string;
toolCalls?: any[];
toolResults?: any[];
inputTokens: number;
outputTokens: number;
costUsd: number;
}
): Promise<void> {
const cacheKey = generateCacheKey(message, context, model);
const messageHash = generateMessageHash(message);
const contextType = context?.type || 'general';
const ttlHours = CACHE_TTL_HOURS[contextType] || CACHE_TTL_HOURS.default;
try {
const supabase = getSupabase();
const { error } = await supabase.rpc('store_cached_response', {
p_cache_key: cacheKey,
p_message_hash: messageHash,
p_context_type: contextType,
p_context_id: context?.id || null,
p_model: model,
p_provider: provider,
p_response_content: response.content,
p_tool_calls: response.toolCalls || null,
p_tool_results: response.toolResults || null,
p_input_tokens: response.inputTokens,
p_output_tokens: response.outputTokens,
p_cost_usd: response.costUsd,
p_ttl_hours: ttlHours,
p_message_preview: message.substring(0, 200),
});
if (error) {
console.error('[LLMCache] Error storing cache:', error);
return;
}
console.log(`[LLMCache] STORED: ${cacheKey.substring(0, 8)}... (TTL: ${ttlHours}h)`);
} catch (error) {
console.error('[LLMCache] Store error:', error);
}
}
/**
* Update cache statistics
*/
async function updateCacheStats(
isHit: boolean,
tokensSaved: number = 0,
costSaved: number = 0,
latencyMs: number = 0
): Promise<void> {
try {
const supabase = getSupabase();
await supabase.rpc('update_cache_stats', {
p_is_hit: isHit,
p_tokens_saved: tokensSaved,
p_cost_saved: costSaved,
p_latency_ms: latencyMs,
});
} catch (error) {
// Don't throw - stats are non-critical
console.error('[LLMCache] Stats update failed:', error);
}
}
/**
* Get cache statistics summary
*/
export async function getCacheStats(days: number = 30): Promise<CacheStats | null> {
try {
const supabase = getSupabase();
const { data, error } = await supabase.rpc('get_cache_stats_summary', {
p_days: days,
});
if (error) {
console.error('[LLMCache] Error getting stats:', error);
return null;
}
if (data && data.length > 0) {
return {
total_hits: data[0].total_hits,
total_misses: data[0].total_misses,
hit_rate: data[0].hit_rate,
total_tokens_saved: data[0].total_tokens_saved,
total_cost_saved: data[0].total_cost_saved,
cache_entries: data[0].cache_entries,
avg_hit_latency_ms: data[0].avg_hit_latency_ms,
};
}
return null;
} catch (error) {
console.error('[LLMCache] Stats fetch error:', error);
return null;
}
}
/**
* Clear expired cache entries (call periodically)
*/
export async function cleanupExpiredCache(): Promise<number> {
try {
const supabase = getSupabase();
const { data, error } = await supabase.rpc('cleanup_expired_cache');
if (error) {
console.error('[LLMCache] Cleanup error:', error);
return 0;
}
return data || 0;
} catch (error) {
console.error('[LLMCache] Cleanup failed:', error);
return 0;
}
}
/**
* Invalidate cache entries by context
* Useful when underlying data changes (e.g., new Hansard import)
*/
export async function invalidateCacheByContext(
contextType: ContextType,
contextId?: string
): Promise<number> {
try {
const supabase = getSupabase();
let query = supabase
.from('llm_response_cache')
.delete()
.eq('context_type', contextType);
if (contextId) {
query = query.eq('context_id', contextId);
}
const { error, count } = await query;
if (error) {
console.error('[LLMCache] Invalidation error:', error);
return 0;
}
console.log(`[LLMCache] Invalidated ${count} entries for ${contextType}/${contextId || '*'}`);
return count || 0;
} catch (error) {
console.error('[LLMCache] Invalidation failed:', error);
return 0;
}
}
/**
* Clear all cache entries (admin only)
*/
export async function clearAllCache(): Promise<number> {
try {
const supabase = getSupabase();
const { error, count } = await supabase
.from('llm_response_cache')
.delete()
.neq('id', '00000000-0000-0000-0000-000000000000'); // Delete all
if (error) {
console.error('[LLMCache] Clear all error:', error);
return 0;
}
console.log(`[LLMCache] Cleared ${count} cache entries`);
return count || 0;
} catch (error) {
console.error('[LLMCache] Clear failed:', error);
return 0;
}
}
/**
* Check if caching is enabled (Supabase credentials available)
*/
export function isCacheEnabled(): boolean {
return !!(
process.env.NEXT_PUBLIC_SUPABASE_URL &&
process.env.SUPABASE_SERVICE_ROLE_KEY
);
}