Skip to main content
Glama
007_chat_context_optimization.sql9.04 kB
-- Migration: Chat Context Optimization -- Description: Add fields for context optimization with summary, token estimation, and embedding support -- Version: 007 -- Date: 2024-12-06 -- -- Purpose: -- 1. Enable progressive summarization of conversation history -- 2. Track token usage per message for budget calculation -- 3. Store embeddings for semantic span retrieval -- 4. Support turn-based message ordering -- Ensure pgvector extension is enabled (should already exist from migration 004) CREATE EXTENSION IF NOT EXISTS vector; -- ========================================== -- CONVERSATIONS TABLE UPDATES -- ========================================== -- Add summary column to store progressive conversation summary ALTER TABLE conversations ADD COLUMN IF NOT EXISTS summary TEXT; COMMENT ON COLUMN conversations.summary IS 'Progressive summary of conversation history for context optimization'; -- Add summary token estimate for quick budget calculation ALTER TABLE conversations ADD COLUMN IF NOT EXISTS summary_token_estimate INTEGER DEFAULT 0; COMMENT ON COLUMN conversations.summary_token_estimate IS 'Estimated token count of the summary for budget calculations'; -- Add last_summarized_at to track when summary was last updated ALTER TABLE conversations ADD COLUMN IF NOT EXISTS last_summarized_at TIMESTAMPTZ; COMMENT ON COLUMN conversations.last_summarized_at IS 'Timestamp of last summary update'; -- ========================================== -- MESSAGES TABLE UPDATES -- ========================================== -- Add turn_index for message ordering within conversation -- This allows efficient retrieval of messages by position ALTER TABLE messages ADD COLUMN IF NOT EXISTS turn_index INTEGER; COMMENT ON COLUMN messages.turn_index IS 'Sequential index of message within conversation (0,1,2,3...)'; -- Add token_estimate for budget calculation without re-tokenizing ALTER TABLE messages ADD COLUMN IF NOT EXISTS token_estimate INTEGER; COMMENT ON COLUMN messages.token_estimate IS 'Estimated token count of message content'; -- Add is_summarized flag to track which messages have been compressed into summary ALTER TABLE messages ADD COLUMN IF NOT EXISTS is_summarized BOOLEAN NOT NULL DEFAULT false; COMMENT ON COLUMN messages.is_summarized IS 'Whether this message has been included in the conversation summary'; -- Add embedding column for semantic similarity search -- Using 1536 dimensions for OpenAI text-embedding-3-small or 384 for all-MiniLM-L6-v2 -- Default to 1536 for compatibility with OpenAI, can be adjusted via config ALTER TABLE messages ADD COLUMN IF NOT EXISTS embedding vector(1536); COMMENT ON COLUMN messages.embedding IS 'Vector embedding for semantic similarity search (span retrieval)'; -- ========================================== -- INDEXES FOR PERFORMANCE -- ========================================== -- Index for efficient turn-based retrieval CREATE INDEX IF NOT EXISTS idx_messages_turn_index ON messages(conversation_id, turn_index ASC); -- Index for finding unsummarized messages CREATE INDEX IF NOT EXISTS idx_messages_is_summarized ON messages(conversation_id, is_summarized) WHERE is_summarized = false; -- Vector index for semantic similarity search (cosine distance) -- Using IVFFlat for approximate nearest neighbor search CREATE INDEX IF NOT EXISTS idx_messages_embedding ON messages USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); -- Index for token budget calculations CREATE INDEX IF NOT EXISTS idx_messages_token_estimate ON messages(conversation_id, token_estimate); -- ========================================== -- BACKFILL TURN_INDEX FOR EXISTING DATA -- ========================================== -- Set turn_index for existing messages based on created_at order -- This ensures backward compatibility with existing data WITH ranked_messages AS ( SELECT id, ROW_NUMBER() OVER ( PARTITION BY conversation_id ORDER BY created_at ASC ) - 1 AS calculated_turn_index FROM messages WHERE turn_index IS NULL ) UPDATE messages m SET turn_index = rm.calculated_turn_index FROM ranked_messages rm WHERE m.id = rm.id; -- ========================================== -- CHAT CONTEXT CONFIG TABLE -- ========================================== -- Table to store per-project/tool context configuration CREATE TABLE IF NOT EXISTS chat_context_config ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), -- Scope: either project_id OR tool_id, or both project_id VARCHAR(255), tool_id VARCHAR(255), -- Strategy configuration strategy VARCHAR(50) NOT NULL DEFAULT 'summary+recent' CHECK (strategy IN ('full', 'last-n', 'summary+recent', 'span-retrieval')), -- Token budget settings max_prompt_tokens INTEGER DEFAULT 4096, recent_min_messages INTEGER DEFAULT 5, -- Summarization settings enable_summarization BOOLEAN DEFAULT true, summary_trigger_tokens INTEGER DEFAULT 2000, summary_model_id VARCHAR(255), -- Model to use for summarization (default: L0) -- Span retrieval settings span_top_k INTEGER DEFAULT 5, span_radius INTEGER DEFAULT 2, span_budget_ratio DECIMAL(3,2) DEFAULT 0.6, -- Embedding settings embedding_model_id VARCHAR(255), -- Model/service for embeddings embedding_dimension INTEGER DEFAULT 1536, -- Metadata created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), -- Ensure unique config per scope CONSTRAINT unique_project_tool_config UNIQUE NULLS NOT DISTINCT (project_id, tool_id) ); -- Indexes for config lookup CREATE INDEX IF NOT EXISTS idx_chat_context_config_project ON chat_context_config(project_id); CREATE INDEX IF NOT EXISTS idx_chat_context_config_tool ON chat_context_config(tool_id); -- Trigger for updated_at CREATE TRIGGER update_chat_context_config_updated_at BEFORE UPDATE ON chat_context_config FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); -- Comments COMMENT ON TABLE chat_context_config IS 'Configuration for chat context optimization strategies per project/tool'; COMMENT ON COLUMN chat_context_config.strategy IS 'Context building strategy: full (all messages), last-n (recent only), summary+recent, or span-retrieval'; COMMENT ON COLUMN chat_context_config.span_top_k IS 'Number of semantically similar messages to retrieve'; COMMENT ON COLUMN chat_context_config.span_radius IS 'Number of adjacent messages to include around each retrieved message'; COMMENT ON COLUMN chat_context_config.span_budget_ratio IS 'Fraction of token budget allocated to span retrieval (0.0-1.0)'; -- ========================================== -- INSERT DEFAULT CONFIGURATION -- ========================================== -- Default config (applies when no project/tool specific config exists) INSERT INTO chat_context_config ( project_id, tool_id, strategy, max_prompt_tokens, recent_min_messages, enable_summarization, summary_trigger_tokens, span_top_k, span_radius, span_budget_ratio ) VALUES ( NULL, -- Global default NULL, 'summary+recent', -- Default to summary+recent strategy 4096, 5, true, 2000, 5, 2, 0.6 ) ON CONFLICT (project_id, tool_id) DO NOTHING; -- ========================================== -- HELPER FUNCTIONS -- ========================================== -- Function to get next turn_index for a conversation CREATE OR REPLACE FUNCTION get_next_turn_index(p_conversation_id VARCHAR(255)) RETURNS INTEGER AS $$ DECLARE next_idx INTEGER; BEGIN SELECT COALESCE(MAX(turn_index), -1) + 1 INTO next_idx FROM messages WHERE conversation_id = p_conversation_id; RETURN next_idx; END; $$ LANGUAGE plpgsql; COMMENT ON FUNCTION get_next_turn_index IS 'Returns the next turn_index for a new message in a conversation'; -- Function to find semantically similar messages CREATE OR REPLACE FUNCTION find_similar_messages( p_conversation_id VARCHAR(255), p_query_embedding vector(1536), p_limit INTEGER DEFAULT 5, p_exclude_turn_index INTEGER DEFAULT NULL ) RETURNS TABLE ( id UUID, role VARCHAR(50), content TEXT, turn_index INTEGER, token_estimate INTEGER, similarity FLOAT ) AS $$ BEGIN RETURN QUERY SELECT m.id, m.role, m.content, m.turn_index, m.token_estimate, 1 - (m.embedding <=> p_query_embedding) AS similarity FROM messages m WHERE m.conversation_id = p_conversation_id AND m.embedding IS NOT NULL AND (p_exclude_turn_index IS NULL OR m.turn_index < p_exclude_turn_index) ORDER BY m.embedding <=> p_query_embedding LIMIT p_limit; END; $$ LANGUAGE plpgsql; COMMENT ON FUNCTION find_similar_messages IS 'Find messages semantically similar to a query embedding using cosine similarity'; -- ========================================== -- MIGRATION COMPLETE -- ==========================================

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/babasida246/ai-mcp-gateway'

If you have feedback or need assistance with the MCP directory API, please join our Discord server