Skip to main content
Glama
enable-pgvector.sql9.24 kB
-- ThoughtMCP pgvector Extension Setup -- This script enables pgvector and creates vector-based tables and indexes -- ============================================================================ -- Enable pgvector Extension -- ============================================================================ CREATE EXTENSION IF NOT EXISTS vector; -- Verify extension is loaded DO $$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN RAISE EXCEPTION 'pgvector extension failed to load'; END IF; RAISE NOTICE 'pgvector extension enabled successfully'; END $$; -- ============================================================================ -- Multi-Sector Embeddings Table -- ============================================================================ -- Stores five-sector embeddings for each memory (HMD architecture) -- Default dimension is 768 (Ollama nomic-embed-text), but can be adjusted CREATE TABLE IF NOT EXISTS memory_embeddings ( memory_id TEXT NOT NULL, sector TEXT NOT NULL, embedding vector(768), -- Default dimension, will support configurable sizes dimension INTEGER NOT NULL DEFAULT 768, model TEXT NOT NULL, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (memory_id, sector), FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE, CONSTRAINT valid_sector CHECK (sector IN ('episodic', 'semantic', 'procedural', 'emotional', 'reflective')), CONSTRAINT valid_dimension CHECK (dimension > 0) ); -- ============================================================================ -- Vector Similarity Indexes -- ============================================================================ -- IVFFlat indexes for fast approximate nearest neighbor search -- Lists parameter: sqrt(total_rows) is a good starting point -- For 100k memories × 5 sectors = 500k embeddings, lists ≈ 707 -- Episodic sector index (temporal, event-based memories) -- Note: WITH clause must come before WHERE clause in PostgreSQL CREATE INDEX IF NOT EXISTS idx_embeddings_episodic_vector ON memory_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100) WHERE sector = 'episodic'; -- Semantic sector index (factual, conceptual knowledge) CREATE INDEX IF NOT EXISTS idx_embeddings_semantic_vector ON memory_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100) WHERE sector = 'semantic'; -- Procedural sector index (how-to, process knowledge) CREATE INDEX IF NOT EXISTS idx_embeddings_procedural_vector ON memory_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100) WHERE sector = 'procedural'; -- Emotional sector index (affective content) CREATE INDEX IF NOT EXISTS idx_embeddings_emotional_vector ON memory_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100) WHERE sector = 'emotional'; -- Reflective sector index (meta-cognitive insights) CREATE INDEX IF NOT EXISTS idx_embeddings_reflective_vector ON memory_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100) WHERE sector = 'reflective'; -- Additional indexes for efficient querying CREATE INDEX IF NOT EXISTS idx_embeddings_memory ON memory_embeddings(memory_id); CREATE INDEX IF NOT EXISTS idx_embeddings_sector ON memory_embeddings(sector); CREATE INDEX IF NOT EXISTS idx_embeddings_model ON memory_embeddings(model); CREATE INDEX IF NOT EXISTS idx_embeddings_updated ON memory_embeddings(updated_at DESC); -- ============================================================================ -- Vector Search Functions -- ============================================================================ -- Function to find similar memories using vector similarity -- Returns memories ranked by composite score: 0.6×similarity + 0.2×salience + 0.1×recency + 0.1×link_weight CREATE OR REPLACE FUNCTION find_similar_memories( p_query_embedding vector(768), p_sector TEXT, p_user_id TEXT, p_limit INTEGER DEFAULT 10, p_similarity_threshold REAL DEFAULT 0.7 ) RETURNS TABLE( memory_id TEXT, content TEXT, similarity REAL, salience REAL, recency_score REAL, composite_score REAL, created_at TIMESTAMP WITH TIME ZONE, last_accessed TIMESTAMP WITH TIME ZONE ) AS $$ BEGIN RETURN QUERY WITH similarity_scores AS ( SELECT e.memory_id, 1 - (e.embedding <=> p_query_embedding) AS similarity_score FROM memory_embeddings e WHERE e.sector = p_sector AND 1 - (e.embedding <=> p_query_embedding) >= p_similarity_threshold ), recency_scores AS ( SELECT m.id, EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - m.last_accessed)) / 86400.0 AS days_since_access, EXP(-0.01 * EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - m.last_accessed)) / 86400.0) AS recency_score FROM memories m WHERE m.user_id = p_user_id ), link_weights AS ( SELECT ml.target_id AS memory_id, AVG(ml.weight) AS avg_link_weight FROM memory_links ml GROUP BY ml.target_id ) SELECT m.id AS memory_id, m.content, ss.similarity_score AS similarity, m.salience, rs.recency_score, (0.6 * ss.similarity_score + 0.2 * m.salience + 0.1 * rs.recency_score + 0.1 * COALESCE(lw.avg_link_weight, 0.0)) AS composite_score, m.created_at, m.last_accessed FROM memories m INNER JOIN similarity_scores ss ON m.id = ss.memory_id INNER JOIN recency_scores rs ON m.id = rs.id LEFT JOIN link_weights lw ON m.id = lw.memory_id WHERE m.user_id = p_user_id ORDER BY composite_score DESC LIMIT p_limit; END; $$ LANGUAGE plpgsql; -- Function to find similar memories across multiple sectors CREATE OR REPLACE FUNCTION find_similar_memories_multi_sector( p_query_embeddings JSONB, -- JSON object with sector names as keys and embeddings as arrays p_sector_weights JSONB, -- JSON object with sector names as keys and weights as values p_user_id TEXT, p_limit INTEGER DEFAULT 10, p_similarity_threshold REAL DEFAULT 0.7 ) RETURNS TABLE( memory_id TEXT, content TEXT, weighted_similarity REAL, composite_score REAL ) AS $$ DECLARE sector_name TEXT; sector_weight REAL; query_embedding vector(768); BEGIN -- This is a placeholder for multi-sector search -- Full implementation will aggregate scores across sectors RAISE NOTICE 'Multi-sector search function placeholder'; RETURN; END; $$ LANGUAGE plpgsql; -- Function to update embedding updated_at timestamp CREATE OR REPLACE FUNCTION update_embedding_timestamp() RETURNS TRIGGER AS $$ BEGIN NEW.updated_at = CURRENT_TIMESTAMP; RETURN NEW; END; $$ LANGUAGE plpgsql; -- Trigger to automatically update embedding timestamp CREATE TRIGGER trigger_update_embedding_timestamp BEFORE UPDATE ON memory_embeddings FOR EACH ROW EXECUTE FUNCTION update_embedding_timestamp(); -- ============================================================================ -- Index Maintenance Functions -- ============================================================================ -- Function to rebuild vector indexes (useful after bulk inserts) CREATE OR REPLACE FUNCTION rebuild_vector_indexes() RETURNS TEXT AS $$ BEGIN REINDEX INDEX CONCURRENTLY idx_embeddings_episodic_vector; REINDEX INDEX CONCURRENTLY idx_embeddings_semantic_vector; REINDEX INDEX CONCURRENTLY idx_embeddings_procedural_vector; REINDEX INDEX CONCURRENTLY idx_embeddings_emotional_vector; REINDEX INDEX CONCURRENTLY idx_embeddings_reflective_vector; RETURN 'Vector indexes rebuilt successfully'; EXCEPTION WHEN OTHERS THEN RETURN 'Error rebuilding indexes: ' || SQLERRM; END; $$ LANGUAGE plpgsql; -- Function to get vector index statistics CREATE OR REPLACE FUNCTION get_vector_index_stats() RETURNS TABLE( index_name TEXT, index_size TEXT, tuples BIGINT ) AS $$ BEGIN RETURN QUERY SELECT i.indexrelname::TEXT AS index_name, pg_size_pretty(pg_relation_size(i.indexrelid)) AS index_size, s.n_tup_ins AS tuples FROM pg_stat_user_indexes s JOIN pg_index i ON s.indexrelid = i.indexrelid WHERE i.indexrelname LIKE 'idx_embeddings_%_vector' ORDER BY pg_relation_size(i.indexrelid) DESC; END; $$ LANGUAGE plpgsql; -- ============================================================================ -- Performance Optimization -- ============================================================================ -- Analyze tables for query optimization ANALYZE memories; ANALYZE memory_embeddings; ANALYZE memory_links; ANALYZE memory_metadata; -- ============================================================================ -- Completion Message -- ============================================================================ DO $$ BEGIN RAISE NOTICE 'pgvector extension configured successfully'; RAISE NOTICE 'Vector indexes created for all five memory sectors'; RAISE NOTICE 'Database is ready for HMD memory operations'; END $$;

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/keyurgolani/ThoughtMcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server