Skip to main content
Glama

MCP AI Service Platform

by dkb12138ggg
init.sql6.71 kB
-- 初始化PostgreSQL数据库和RAG相关扩展 -- 启用pgvector扩展用于向量存储 CREATE EXTENSION IF NOT EXISTS vector; -- 启用全文搜索扩展 CREATE EXTENSION IF NOT EXISTS pg_trgm; -- 创建RAG相关表结构 -- 文档表 CREATE TABLE IF NOT EXISTS documents ( id SERIAL PRIMARY KEY, title VARCHAR(255) NOT NULL, content TEXT NOT NULL, metadata JSONB DEFAULT '{}', file_type VARCHAR(50), file_size INTEGER, file_hash VARCHAR(64) UNIQUE, source_url TEXT, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, indexed_at TIMESTAMP WITH TIME ZONE, -- 全文搜索索引 search_vector tsvector GENERATED ALWAYS AS ( to_tsvector('english', coalesce(title, '') || ' ' || coalesce(content, '')) ) STORED ); -- 文档块表(用于分块存储) CREATE TABLE IF NOT EXISTS document_chunks ( id SERIAL PRIMARY KEY, document_id INTEGER REFERENCES documents(id) ON DELETE CASCADE, chunk_index INTEGER NOT NULL, content TEXT NOT NULL, metadata JSONB DEFAULT '{}', -- 向量嵌入,使用1536维度(OpenAI embedding) embedding vector(1536), created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, -- 确保每个文档的块索引唯一 UNIQUE(document_id, chunk_index) ); -- 查询历史表 CREATE TABLE IF NOT EXISTS query_history ( id SERIAL PRIMARY KEY, query_text TEXT NOT NULL, query_embedding vector(1536), user_id VARCHAR(255), session_id VARCHAR(255), -- 查询结果 results JSONB, matched_chunks INTEGER[], similarity_scores FLOAT[], -- 性能指标 execution_time_ms INTEGER, total_chunks_searched INTEGER, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP ); -- 创建索引优化查询性能 -- 文档索引 CREATE INDEX IF NOT EXISTS idx_documents_file_hash ON documents(file_hash); CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(created_at); CREATE INDEX IF NOT EXISTS idx_documents_search_vector ON documents USING GIN(search_vector); CREATE INDEX IF NOT EXISTS idx_documents_metadata ON documents USING GIN(metadata); -- 文档块索引 CREATE INDEX IF NOT EXISTS idx_document_chunks_document_id ON document_chunks(document_id); CREATE INDEX IF NOT EXISTS idx_document_chunks_embedding ON document_chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); -- 查询历史索引 CREATE INDEX IF NOT EXISTS idx_query_history_user_id ON query_history(user_id); CREATE INDEX IF NOT EXISTS idx_query_history_session_id ON query_history(session_id); CREATE INDEX IF NOT EXISTS idx_query_history_created_at ON query_history(created_at); CREATE INDEX IF NOT EXISTS idx_query_history_embedding ON query_history USING ivfflat (query_embedding vector_cosine_ops) WITH (lists = 100); -- 创建更新时间戳的触发器函数 CREATE OR REPLACE FUNCTION update_updated_at_column() RETURNS TRIGGER AS $$ BEGIN NEW.updated_at = CURRENT_TIMESTAMP; RETURN NEW; END; $$ language 'plpgsql'; -- 为documents表创建更新时间戳触发器 CREATE TRIGGER update_documents_updated_at BEFORE UPDATE ON documents FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); -- 创建一些有用的视图 -- 文档统计视图 CREATE OR REPLACE VIEW document_stats AS SELECT COUNT(*) as total_documents, COUNT(CASE WHEN indexed_at IS NOT NULL THEN 1 END) as indexed_documents, SUM(file_size) as total_size, AVG(file_size) as avg_size, COUNT(DISTINCT file_type) as unique_file_types FROM documents; -- 块统计视图 CREATE OR REPLACE VIEW chunk_stats AS SELECT d.id as document_id, d.title, COUNT(dc.id) as chunk_count, COUNT(CASE WHEN dc.embedding IS NOT NULL THEN 1 END) as embedded_chunks FROM documents d LEFT JOIN document_chunks dc ON d.id = dc.document_id GROUP BY d.id, d.title; -- 查询性能统计视图 CREATE OR REPLACE VIEW query_performance_stats AS SELECT DATE_TRUNC('hour', created_at) as hour, COUNT(*) as query_count, AVG(execution_time_ms) as avg_execution_time, MAX(execution_time_ms) as max_execution_time, AVG(total_chunks_searched) as avg_chunks_searched FROM query_history WHERE created_at >= NOW() - INTERVAL '24 hours' GROUP BY DATE_TRUNC('hour', created_at) ORDER BY hour DESC; -- 创建一些有用的函数 -- 向量相似度搜索函数 CREATE OR REPLACE FUNCTION search_similar_chunks( query_embedding vector(1536), similarity_threshold float DEFAULT 0.7, limit_count integer DEFAULT 10 ) RETURNS TABLE( chunk_id integer, document_id integer, document_title varchar, content text, similarity_score float ) AS $$ BEGIN RETURN QUERY SELECT dc.id, dc.document_id, d.title, dc.content, 1 - (dc.embedding <=> query_embedding) as similarity FROM document_chunks dc JOIN documents d ON dc.document_id = d.id WHERE dc.embedding IS NOT NULL AND 1 - (dc.embedding <=> query_embedding) >= similarity_threshold ORDER BY dc.embedding <=> query_embedding LIMIT limit_count; END; $$ LANGUAGE plpgsql; -- 全文搜索函数 CREATE OR REPLACE FUNCTION search_documents_fulltext( search_query text, limit_count integer DEFAULT 10 ) RETURNS TABLE( document_id integer, title varchar, content text, rank float ) AS $$ BEGIN RETURN QUERY SELECT d.id, d.title, d.content, ts_rank(d.search_vector, plainto_tsquery('english', search_query)) as rank FROM documents d WHERE d.search_vector @@ plainto_tsquery('english', search_query) ORDER BY rank DESC LIMIT limit_count; END; $$ LANGUAGE plpgsql; -- 插入一些示例数据(可选) INSERT INTO documents (title, content, file_type, file_size, file_hash, metadata) VALUES ('示例文档1', '这是一个关于机器学习的示例文档,包含了深度学习和神经网络的基础知识。', 'txt', 1024, 'hash1', '{"category": "AI", "tags": ["machine-learning", "deep-learning"]}'), ('示例文档2', '这是一个关于Web开发的示例文档,涵盖了前端和后端技术栈。', 'txt', 2048, 'hash2', '{"category": "Web", "tags": ["frontend", "backend"]}') ON CONFLICT (file_hash) DO NOTHING; -- 输出完成信息 DO $$ BEGIN RAISE NOTICE '数据库初始化完成!'; RAISE NOTICE '- 已创建pgvector扩展支持向量存储'; RAISE NOTICE '- 已创建documents、document_chunks、query_history表'; RAISE NOTICE '- 已创建相关索引和视图'; RAISE NOTICE '- 已创建搜索和统计函数'; END $$;

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/dkb12138ggg/python-rag-mcp-client'

If you have feedback or need assistance with the MCP directory API, please join our Discord server