rebuild_communities
Clear outdated communities and reconstruct new ones at specified levels (macro or micro) to maintain organized academic literature networks.
Instructions
重建社区
清除指定层级的旧社区并重新构建。
Args: level: 社区层级,"macro" 或 "micro" min_df: 节点最小文档频率,默认 3 resolution: Leiden 分辨率参数,默认 1.0
Returns: 新社区列表
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| level | No | macro | |
| min_df | No | ||
| resolution | No |
Implementation Reference
- The core implementation of the rebuild_communities tool. It clears existing communities for the given level, constructs a weighted undirected graph from paper-entity relations (filtering nodes by min_df), applies Leiden clustering with specified resolution, and persists the new communities and their members to the database. Returns list of new communities with top entities.def rebuild_communities( level: str = "macro", min_df: int = 3, resolution: float = 1.0, ) -> dict[str, Any]: """重建社区 清除指定层级的旧社区并重新构建。 Args: level: 社区层级,"macro" 或 "micro" min_df: 节点最小文档频率,默认 3 resolution: Leiden 分辨率参数,默认 1.0 Returns: 新社区列表 """ try: # 直接调用 build_communities_v1 with rebuild=True from paperlib_mcp.tools.graph_community import register_graph_community_tools # 由于我们需要直接调用逻辑,这里重新实现简化版本 # 或者导入并使用内部逻辑 try: import igraph as ig import leidenalg except ImportError: return { "communities": [], "error": { "code": "DEPENDENCY_ERROR", "message": "igraph and leidenalg are required" } } from paperlib_mcp.tools.graph_community import ( COMMUNITY_ENTITY_TYPES, get_edge_weight, ) from paperlib_mcp.models_graph import CommunityBrief from collections import defaultdict import json with get_db() as conn: # 清除旧结果 with conn.cursor() as cur: cur.execute( """ DELETE FROM community_members WHERE comm_id IN (SELECT comm_id FROM communities WHERE level = %s) """, (level,) ) cur.execute( """ DELETE FROM community_summaries WHERE comm_id IN (SELECT comm_id FROM communities WHERE level = %s) """, (level,) ) cur.execute( "DELETE FROM communities WHERE level = %s", (level,) ) # 获取 Paper->Entity 关系 relations = query_all( """ SELECT p.entity_id AS paper_eid, p.canonical_key AS doc_id, x.entity_id AS node_eid, x.type AS node_type, x.canonical_name FROM relations r JOIN entities p ON p.entity_id = r.subj_entity_id AND p.type = 'Paper' JOIN entities x ON x.entity_id = r.obj_entity_id WHERE r.predicate IN ( 'PAPER_HAS_TOPIC', 'PAPER_USES_MEASURE', 'PAPER_IDENTIFIES_WITH', 'PAPER_IN_SETTING', 'PAPER_USES_DATA' ) AND x.type = ANY(%s) """, (COMMUNITY_ENTITY_TYPES,) ) if not relations: return {"communities": [], "error": {"code": "NOT_FOUND", "message": "No relations found"}} # 构建图并聚类(同 build_communities_v1) paper_to_nodes: dict[str, set[int]] = defaultdict(set) node_info: dict[int, dict] = {} for r in relations: paper_to_nodes[r["doc_id"]].add(r["node_eid"]) if r["node_eid"] not in node_info: node_info[r["node_eid"]] = { "entity_id": r["node_eid"], "type": r["node_type"], "canonical_name": r["canonical_name"], } node_df: dict[int, int] = defaultdict(int) for doc_id, nodes in paper_to_nodes.items(): for node_id in nodes: node_df[node_id] += 1 valid_nodes = {nid for nid, df in node_df.items() if df >= min_df} if not valid_nodes: return {"communities": [], "error": {"code": "NOT_FOUND", "message": f"No nodes with df >= {min_df}"}} # 构建边 edge_counts: dict[tuple[int, int], float] = defaultdict(float) for doc_id, nodes in paper_to_nodes.items(): valid_doc_nodes = [n for n in nodes if n in valid_nodes] for i, n1 in enumerate(valid_doc_nodes): for n2 in valid_doc_nodes[i+1:]: key = (min(n1, n2), max(n1, n2)) weight = get_edge_weight(node_info[n1]["type"], node_info[n2]["type"]) edge_counts[key] += weight if not edge_counts: return {"communities": [], "error": {"code": "NOT_FOUND", "message": "No edges found"}} # 构建 igraph node_list = sorted(valid_nodes) node_to_idx = {nid: idx for idx, nid in enumerate(node_list)} edges = [] weights = [] for (n1, n2), w in edge_counts.items(): edges.append((node_to_idx[n1], node_to_idx[n2])) weights.append(w) g = ig.Graph(n=len(node_list), edges=edges, directed=False) g.es["weight"] = weights # Leiden 聚类 partition = leidenalg.find_partition( g, leidenalg.RBConfigurationVertexPartition, weights="weight", resolution_parameter=resolution ) # 写入数据库 communities_result = [] community_members_map: dict[int, list[tuple[int, float]]] = defaultdict(list) for node_idx, comm_idx in enumerate(partition.membership): node_id = node_list[node_idx] weight = float(node_df[node_id]) community_members_map[comm_idx].append((node_id, weight)) with conn.cursor() as cur: for comm_idx, members in community_members_map.items(): if len(members) < 2: continue cur.execute( """ INSERT INTO communities(level, method, params) VALUES (%s, 'leiden', %s::jsonb) RETURNING comm_id """, (level, json.dumps({"resolution": resolution, "min_df": min_df})) ) result = cur.fetchone() comm_id = result["comm_id"] for node_id, weight in members: cur.execute( """ INSERT INTO community_members(comm_id, entity_id, role, weight) VALUES (%s, %s, 'member', %s) """, (comm_id, node_id, weight) ) sorted_members = sorted(members, key=lambda x: x[1], reverse=True) top_entities = [] for node_id, weight in sorted_members[:20]: info = node_info.get(node_id, {}) top_entities.append({ "entity_id": node_id, "type": info.get("type", ""), "canonical_name": info.get("canonical_name", ""), "weight": weight, }) communities_result.append({ "comm_id": comm_id, "size": len(members), "top_entities": top_entities, }) communities_result.sort(key=lambda x: x["size"], reverse=True) return {"communities": communities_result} except Exception as e: return {"communities": [], "error": {"code": "DB_CONN_ERROR", "message": str(e)}}
- src/paperlib_mcp/server.py:44-44 (registration)The call that registers all graph maintenance tools (including rebuild_communities) to the main FastMCP server instance.register_graph_maintenance_tools(mcp)
- src/paperlib_mcp/tools/graph_maintenance.py:23-24 (registration)Function that defines and registers the graph maintenance tools using @mcp.tool() decorators when called with an MCP instance.def register_graph_maintenance_tools(mcp: FastMCP) -> None: """注册 GraphRAG 维护工具"""