Skip to main content
Glama

rebuild_communities

Clear outdated communities and reconstruct new ones at specified levels (macro or micro) to maintain organized academic literature networks.

Instructions

重建社区

清除指定层级的旧社区并重新构建。

Args: level: 社区层级,"macro" 或 "micro" min_df: 节点最小文档频率,默认 3 resolution: Leiden 分辨率参数,默认 1.0

Returns: 新社区列表

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
levelNomacro
min_dfNo
resolutionNo

Implementation Reference

  • The core implementation of the rebuild_communities tool. It clears existing communities for the given level, constructs a weighted undirected graph from paper-entity relations (filtering nodes by min_df), applies Leiden clustering with specified resolution, and persists the new communities and their members to the database. Returns list of new communities with top entities.
    def rebuild_communities(
        level: str = "macro",
        min_df: int = 3,
        resolution: float = 1.0,
    ) -> dict[str, Any]:
        """重建社区
        
        清除指定层级的旧社区并重新构建。
        
        Args:
            level: 社区层级,"macro" 或 "micro"
            min_df: 节点最小文档频率,默认 3
            resolution: Leiden 分辨率参数,默认 1.0
            
        Returns:
            新社区列表
        """
        try:
            # 直接调用 build_communities_v1 with rebuild=True
            from paperlib_mcp.tools.graph_community import register_graph_community_tools
            
            # 由于我们需要直接调用逻辑,这里重新实现简化版本
            # 或者导入并使用内部逻辑
            
            try:
                import igraph as ig
                import leidenalg
            except ImportError:
                return {
                    "communities": [],
                    "error": {
                        "code": "DEPENDENCY_ERROR",
                        "message": "igraph and leidenalg are required"
                    }
                }
            
            from paperlib_mcp.tools.graph_community import (
                COMMUNITY_ENTITY_TYPES,
                get_edge_weight,
            )
            from paperlib_mcp.models_graph import CommunityBrief
            from collections import defaultdict
            import json
            
            with get_db() as conn:
                # 清除旧结果
                with conn.cursor() as cur:
                    cur.execute(
                        """
                        DELETE FROM community_members 
                        WHERE comm_id IN (SELECT comm_id FROM communities WHERE level = %s)
                        """,
                        (level,)
                    )
                    cur.execute(
                        """
                        DELETE FROM community_summaries 
                        WHERE comm_id IN (SELECT comm_id FROM communities WHERE level = %s)
                        """,
                        (level,)
                    )
                    cur.execute(
                        "DELETE FROM communities WHERE level = %s",
                        (level,)
                    )
                
                # 获取 Paper->Entity 关系
                relations = query_all(
                    """
                    SELECT 
                        p.entity_id AS paper_eid,
                        p.canonical_key AS doc_id,
                        x.entity_id AS node_eid,
                        x.type AS node_type,
                        x.canonical_name
                    FROM relations r
                    JOIN entities p ON p.entity_id = r.subj_entity_id AND p.type = 'Paper'
                    JOIN entities x ON x.entity_id = r.obj_entity_id
                    WHERE r.predicate IN (
                        'PAPER_HAS_TOPIC', 'PAPER_USES_MEASURE', 'PAPER_IDENTIFIES_WITH',
                        'PAPER_IN_SETTING', 'PAPER_USES_DATA'
                    )
                    AND x.type = ANY(%s)
                    """,
                    (COMMUNITY_ENTITY_TYPES,)
                )
                
                if not relations:
                    return {"communities": [], "error": {"code": "NOT_FOUND", "message": "No relations found"}}
                
                # 构建图并聚类(同 build_communities_v1)
                paper_to_nodes: dict[str, set[int]] = defaultdict(set)
                node_info: dict[int, dict] = {}
                
                for r in relations:
                    paper_to_nodes[r["doc_id"]].add(r["node_eid"])
                    if r["node_eid"] not in node_info:
                        node_info[r["node_eid"]] = {
                            "entity_id": r["node_eid"],
                            "type": r["node_type"],
                            "canonical_name": r["canonical_name"],
                        }
                
                node_df: dict[int, int] = defaultdict(int)
                for doc_id, nodes in paper_to_nodes.items():
                    for node_id in nodes:
                        node_df[node_id] += 1
                
                valid_nodes = {nid for nid, df in node_df.items() if df >= min_df}
                
                if not valid_nodes:
                    return {"communities": [], "error": {"code": "NOT_FOUND", "message": f"No nodes with df >= {min_df}"}}
                
                # 构建边
                edge_counts: dict[tuple[int, int], float] = defaultdict(float)
                for doc_id, nodes in paper_to_nodes.items():
                    valid_doc_nodes = [n for n in nodes if n in valid_nodes]
                    for i, n1 in enumerate(valid_doc_nodes):
                        for n2 in valid_doc_nodes[i+1:]:
                            key = (min(n1, n2), max(n1, n2))
                            weight = get_edge_weight(node_info[n1]["type"], node_info[n2]["type"])
                            edge_counts[key] += weight
                
                if not edge_counts:
                    return {"communities": [], "error": {"code": "NOT_FOUND", "message": "No edges found"}}
                
                # 构建 igraph
                node_list = sorted(valid_nodes)
                node_to_idx = {nid: idx for idx, nid in enumerate(node_list)}
                
                edges = []
                weights = []
                for (n1, n2), w in edge_counts.items():
                    edges.append((node_to_idx[n1], node_to_idx[n2]))
                    weights.append(w)
                
                g = ig.Graph(n=len(node_list), edges=edges, directed=False)
                g.es["weight"] = weights
                
                # Leiden 聚类
                partition = leidenalg.find_partition(
                    g, leidenalg.RBConfigurationVertexPartition,
                    weights="weight", resolution_parameter=resolution
                )
                
                # 写入数据库
                communities_result = []
                community_members_map: dict[int, list[tuple[int, float]]] = defaultdict(list)
                
                for node_idx, comm_idx in enumerate(partition.membership):
                    node_id = node_list[node_idx]
                    weight = float(node_df[node_id])
                    community_members_map[comm_idx].append((node_id, weight))
                
                with conn.cursor() as cur:
                    for comm_idx, members in community_members_map.items():
                        if len(members) < 2:
                            continue
                        
                        cur.execute(
                            """
                            INSERT INTO communities(level, method, params)
                            VALUES (%s, 'leiden', %s::jsonb)
                            RETURNING comm_id
                            """,
                            (level, json.dumps({"resolution": resolution, "min_df": min_df}))
                        )
                        result = cur.fetchone()
                        comm_id = result["comm_id"]
                        
                        for node_id, weight in members:
                            cur.execute(
                                """
                                INSERT INTO community_members(comm_id, entity_id, role, weight)
                                VALUES (%s, %s, 'member', %s)
                                """,
                                (comm_id, node_id, weight)
                            )
                        
                        sorted_members = sorted(members, key=lambda x: x[1], reverse=True)
                        top_entities = []
                        for node_id, weight in sorted_members[:20]:
                            info = node_info.get(node_id, {})
                            top_entities.append({
                                "entity_id": node_id,
                                "type": info.get("type", ""),
                                "canonical_name": info.get("canonical_name", ""),
                                "weight": weight,
                            })
                        
                        communities_result.append({
                            "comm_id": comm_id,
                            "size": len(members),
                            "top_entities": top_entities,
                        })
                
                communities_result.sort(key=lambda x: x["size"], reverse=True)
                
                return {"communities": communities_result}
                
        except Exception as e:
            return {"communities": [], "error": {"code": "DB_CONN_ERROR", "message": str(e)}}
  • The call that registers all graph maintenance tools (including rebuild_communities) to the main FastMCP server instance.
    register_graph_maintenance_tools(mcp)
  • Function that defines and registers the graph maintenance tools using @mcp.tool() decorators when called with an MCP instance.
    def register_graph_maintenance_tools(mcp: FastMCP) -> None:
        """注册 GraphRAG 维护工具"""

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/h-lu/paperlib-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server