Skip to main content
Glama
003_m2_graphrag.sql8.74 kB
-- M2 Migration: GraphRAG v1 表结构 -- 运行方式: psql -h localhost -U paper -d paperlib -f 003_m2_graphrag.sql -- ============================================================ -- 实体表 (entities) -- ============================================================ CREATE TABLE IF NOT EXISTS entities ( entity_id BIGSERIAL PRIMARY KEY, type TEXT NOT NULL, -- Paper/Topic/MeasureProxy/IdentificationStrategy/Method/Setting/DataSource/Mechanism/LimitationGap canonical_name TEXT NOT NULL, -- 规范名称 canonical_key TEXT NOT NULL, -- 规范键(用于去重) normalized JSONB DEFAULT '{}'::jsonb, -- 归一化元数据 confidence FLOAT DEFAULT 1.0, -- 置信度 is_locked BOOLEAN DEFAULT FALSE, -- 是否锁定(防止自动合并) created_at TIMESTAMPTZ DEFAULT now(), updated_at TIMESTAMPTZ DEFAULT now(), UNIQUE (type, canonical_key) ); CREATE INDEX IF NOT EXISTS entities_type_idx ON entities(type); CREATE INDEX IF NOT EXISTS entities_canonical_key_idx ON entities(canonical_key); CREATE INDEX IF NOT EXISTS entities_is_locked_idx ON entities(is_locked) WHERE is_locked = TRUE; -- ============================================================ -- 实体别名表 (entity_aliases) -- ============================================================ CREATE TABLE IF NOT EXISTS entity_aliases ( alias_id BIGSERIAL PRIMARY KEY, entity_id BIGINT NOT NULL REFERENCES entities(entity_id) ON DELETE CASCADE, alias TEXT NOT NULL, -- 原始别名 alias_norm TEXT NOT NULL, -- 归一化别名 alias_type TEXT DEFAULT 'extracted', -- extracted/manual/synonym created_at TIMESTAMPTZ DEFAULT now() ); CREATE INDEX IF NOT EXISTS entity_aliases_entity_idx ON entity_aliases(entity_id); CREATE INDEX IF NOT EXISTS entity_aliases_norm_idx ON entity_aliases(alias_norm); CREATE UNIQUE INDEX IF NOT EXISTS entity_aliases_unique_idx ON entity_aliases(entity_id, alias_norm); -- ============================================================ -- 实体提及表 (mentions) - 证据追溯核心 -- ============================================================ CREATE TABLE IF NOT EXISTS mentions ( mention_id BIGSERIAL PRIMARY KEY, entity_id BIGINT NOT NULL REFERENCES entities(entity_id) ON DELETE CASCADE, doc_id TEXT NOT NULL REFERENCES documents(doc_id) ON DELETE CASCADE, chunk_id BIGINT NOT NULL REFERENCES chunks(chunk_id) ON DELETE CASCADE, page_start INT, page_end INT, span_start INT, -- 在 chunk 文本中的起始位置 span_end INT, -- 在 chunk 文本中的结束位置 quote TEXT, -- 原文引用 confidence FLOAT DEFAULT 1.0, created_at TIMESTAMPTZ DEFAULT now() ); CREATE INDEX IF NOT EXISTS mentions_entity_idx ON mentions(entity_id); CREATE INDEX IF NOT EXISTS mentions_doc_idx ON mentions(doc_id); CREATE INDEX IF NOT EXISTS mentions_chunk_idx ON mentions(chunk_id); -- ============================================================ -- 关系表 (relations) -- ============================================================ CREATE TABLE IF NOT EXISTS relations ( rel_id BIGSERIAL PRIMARY KEY, subj_entity_id BIGINT NOT NULL REFERENCES entities(entity_id) ON DELETE CASCADE, predicate TEXT NOT NULL, -- PAPER_HAS_TOPIC/PAPER_USES_MEASURE/PAPER_IDENTIFIES_WITH/... obj_entity_id BIGINT NOT NULL REFERENCES entities(entity_id) ON DELETE CASCADE, qualifiers JSONB DEFAULT '{}'::jsonb, -- 额外限定条件 confidence FLOAT DEFAULT 1.0, evidence JSONB DEFAULT '{}'::jsonb, -- {doc_id, chunk_id, quote, page_start, page_end} relation_hash TEXT UNIQUE, -- 用于去重 created_at TIMESTAMPTZ DEFAULT now() ); CREATE INDEX IF NOT EXISTS relations_subj_idx ON relations(subj_entity_id); CREATE INDEX IF NOT EXISTS relations_obj_idx ON relations(obj_entity_id); CREATE INDEX IF NOT EXISTS relations_predicate_idx ON relations(predicate); -- ============================================================ -- 结论/声明表 (claims) -- ============================================================ CREATE TABLE IF NOT EXISTS claims ( claim_id BIGSERIAL PRIMARY KEY, doc_id TEXT NOT NULL REFERENCES documents(doc_id) ON DELETE CASCADE, chunk_id BIGINT NOT NULL REFERENCES chunks(chunk_id) ON DELETE CASCADE, claim_text TEXT NOT NULL, -- 结论原文 sign TEXT, -- positive/negative/mixed/null effect_size_text TEXT, -- 效应量描述 conditions JSONB DEFAULT '{}'::jsonb, -- 适用条件 confidence FLOAT DEFAULT 1.0, evidence JSONB DEFAULT '{}'::jsonb, -- 额外证据信息 claim_hash TEXT UNIQUE, -- 用于去重 created_at TIMESTAMPTZ DEFAULT now() ); CREATE INDEX IF NOT EXISTS claims_doc_idx ON claims(doc_id); CREATE INDEX IF NOT EXISTS claims_chunk_idx ON claims(chunk_id); CREATE INDEX IF NOT EXISTS claims_sign_idx ON claims(sign); -- ============================================================ -- 社区表 (communities) -- ============================================================ CREATE TABLE IF NOT EXISTS communities ( comm_id BIGSERIAL PRIMARY KEY, level TEXT NOT NULL DEFAULT 'macro', -- macro/micro method TEXT NOT NULL DEFAULT 'leiden', -- leiden/louvain/... params JSONB DEFAULT '{}'::jsonb, -- 聚类参数 {resolution, min_df, ...} created_at TIMESTAMPTZ DEFAULT now() ); CREATE INDEX IF NOT EXISTS communities_level_idx ON communities(level); -- ============================================================ -- 社区成员表 (community_members) -- ============================================================ CREATE TABLE IF NOT EXISTS community_members ( id BIGSERIAL PRIMARY KEY, comm_id BIGINT NOT NULL REFERENCES communities(comm_id) ON DELETE CASCADE, entity_id BIGINT NOT NULL REFERENCES entities(entity_id) ON DELETE CASCADE, role TEXT DEFAULT 'member', -- member/hub/... weight FLOAT DEFAULT 1.0, -- 成员权重 created_at TIMESTAMPTZ DEFAULT now(), UNIQUE (comm_id, entity_id) ); CREATE INDEX IF NOT EXISTS community_members_comm_idx ON community_members(comm_id); CREATE INDEX IF NOT EXISTS community_members_entity_idx ON community_members(entity_id); -- ============================================================ -- 社区摘要表 (community_summaries) -- ============================================================ CREATE TABLE IF NOT EXISTS community_summaries ( id BIGSERIAL PRIMARY KEY, comm_id BIGINT NOT NULL REFERENCES communities(comm_id) ON DELETE CASCADE UNIQUE, summary_json JSONB NOT NULL, -- {scope, measures, ids, consensus, debates, gaps, entry_points, ...} updated_at TIMESTAMPTZ DEFAULT now() ); CREATE INDEX IF NOT EXISTS community_summaries_comm_idx ON community_summaries(comm_id); -- ============================================================ -- 实体合并日志表 (entity_merge_log) -- ============================================================ CREATE TABLE IF NOT EXISTS entity_merge_log ( id BIGSERIAL PRIMARY KEY, from_entity_id BIGINT NOT NULL, -- 被合并的实体(已删除) to_entity_id BIGINT NOT NULL, -- 目标实体 reason TEXT, -- 合并原因 merged_at TIMESTAMPTZ DEFAULT now() ); CREATE INDEX IF NOT EXISTS entity_merge_log_to_idx ON entity_merge_log(to_entity_id); -- ============================================================ -- 验证表已创建 -- ============================================================ SELECT 'entities' as table_name, COUNT(*) as row_count FROM entities UNION ALL SELECT 'entity_aliases', COUNT(*) FROM entity_aliases UNION ALL SELECT 'mentions', COUNT(*) FROM mentions UNION ALL SELECT 'relations', COUNT(*) FROM relations UNION ALL SELECT 'claims', COUNT(*) FROM claims UNION ALL SELECT 'communities', COUNT(*) FROM communities UNION ALL SELECT 'community_members', COUNT(*) FROM community_members UNION ALL SELECT 'community_summaries', COUNT(*) FROM community_summaries UNION ALL SELECT 'entity_merge_log', COUNT(*) FROM entity_merge_log;

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/h-lu/paperlib-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server