Skip to main content
Glama

summarize_community_v1

Generate structured summaries for academic communities by analyzing literature, identifying key insights, and presenting findings in a specified format.

Instructions

生成社区结构化摘要

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
comm_idYes
pack_idNo
llm_modelNo
max_chunksNo
styleNoecon_finance

Implementation Reference

  • Core handler function that executes the tool logic: validates community, builds evidence pack from top entities' mentions/chunks, formats evidence for prompt, calls LLM asynchronously via OpenRouter for structured JSON summary, generates Markdown, saves summary to DB, returns structured output.
    async def summarize_community_v1_run(
        comm_id: int,
        pack_id: int | None = None,
        llm_model: str | None = None,
        max_chunks: int = 100,
        style: str = "econ_finance",
    ) -> dict[str, Any]:
        """生成社区结构化摘要 (Core Implementation)"""
        try:
            settings = get_settings()
            # 优先使用 llm_summarize_model,留空则回退到 llm_model
            actual_llm_model = llm_model or settings.llm_summarize_model or settings.llm_model
            # 验证社区存在
            community = query_one(
                "SELECT comm_id, level FROM communities WHERE comm_id = %s",
                (comm_id,)
            )
            
            if not community:
                return SummarizeCommunityOut(
                    comm_id=comm_id,
                    pack_id=0,
                    summary_json={},
                    markdown="",
                    error=MCPErrorModel(code="NOT_FOUND", message=f"Community {comm_id} not found"),
                ).model_dump()
            
            # 获取或创建证据包
            actual_pack_id = pack_id
            if not actual_pack_id:
                # 导入并调用 build_community_evidence_pack
                # 注意:此处避免循环导入,如果 graph_community 依赖 graph_summarize 可能会有问题
                # 但此处只引用工具注册,应该没事。或者直接复用 query 逻辑。
                # 为了简单,保持原有逻辑
                
                members = query_all(
                    """
                    SELECT entity_id, weight
                    FROM community_members
                    WHERE comm_id = %s
                    ORDER BY weight DESC
                    """,
                    (comm_id,)
                )
                
                if not members:
                    return SummarizeCommunityOut(
                        comm_id=comm_id,
                        pack_id=0,
                        summary_json={},
                        markdown="",
                        error=MCPErrorModel(code="NOT_FOUND", message="No members in community"),
                    ).model_dump()
                
                entity_ids = [m["entity_id"] for m in members]
                
                # 获取 mentions -> chunks
                mentions = query_all(
                    """
                    SELECT m.doc_id, m.chunk_id, MAX(m.confidence) AS conf
                    FROM mentions m
                    WHERE m.entity_id = ANY(%s)
                    GROUP BY m.doc_id, m.chunk_id
                    ORDER BY conf DESC
                    LIMIT 5000
                    """,
                    (entity_ids,)
                )
                
                # 应用 per_doc_limit
                doc_counts: dict[str, int] = defaultdict(int)
                selected_chunks: list[tuple[str, int]] = []
                per_doc_limit = 4
                
                for m in mentions:
                    if doc_counts[m["doc_id"]] < per_doc_limit:
                        selected_chunks.append((m["doc_id"], m["chunk_id"]))
                        doc_counts[m["doc_id"]] += 1
                        if len(selected_chunks) >= max_chunks:
                            break
                
                if not selected_chunks:
                    return SummarizeCommunityOut(
                        comm_id=comm_id,
                        pack_id=0,
                        summary_json={},
                        markdown="",
                        error=MCPErrorModel(code="NOT_FOUND", message="No chunks found for community"),
                    ).model_dump()
                
                # 创建证据包
                with get_db() as conn:
                    with conn.cursor() as cur:
                        cur.execute(
                            """
                            INSERT INTO evidence_packs(query, params_json)
                            VALUES (%s, %s::jsonb)
                            RETURNING pack_id
                            """,
                            (
                                f"Community {comm_id} summary",
                                json.dumps({"comm_id": comm_id, "for": "summary"})
                            )
                        )
                        result = cur.fetchone()
                        actual_pack_id = result["pack_id"]
                        
                        for rank, (doc_id, chunk_id) in enumerate(selected_chunks):
                            cur.execute(
                                """
                                INSERT INTO evidence_pack_items(pack_id, doc_id, chunk_id, rank)
                                VALUES (%s, %s, %s, %s)
                                ON CONFLICT DO NOTHING
                                """,
                                (actual_pack_id, doc_id, chunk_id, rank)
                            )
            
            # 获取证据包内容
            evidence_items = query_all(
                """
                SELECT 
                    i.doc_id,
                    i.chunk_id,
                    c.page_start,
                    c.page_end,
                    c.text,
                    d.title,
                    d.authors,
                    d.year
                FROM evidence_pack_items i
                JOIN chunks c ON c.chunk_id = i.chunk_id
                JOIN documents d ON d.doc_id = i.doc_id
                WHERE i.pack_id = %s
                ORDER BY i.rank
                """,
                (actual_pack_id,)
            )
            
            if not evidence_items:
                return SummarizeCommunityOut(
                    comm_id=comm_id,
                    pack_id=actual_pack_id,
                    summary_json={},
                    markdown="",
                    error=MCPErrorModel(code="NOT_FOUND", message="Evidence pack is empty"),
                ).model_dump()
            
            # 获取 top entities
            top_entities = query_all(
                """
                SELECT e.canonical_name, e.type, cm.weight
                FROM community_members cm
                JOIN entities e ON e.entity_id = cm.entity_id
                WHERE cm.comm_id = %s
                ORDER BY cm.weight DESC
                LIMIT 20
                """,
                (comm_id,)
            )
            top_entities_str = ", ".join([
                f"{e['canonical_name']} ({e['type']})"
                for e in top_entities
            ])
            
            # 格式化证据
            evidence_text = format_evidence_for_prompt(evidence_items)
            
            # 调用 LLM (Async)
            summary_json = await acall_llm_summarize(top_entities_str, evidence_text, actual_llm_model)
            
            if not summary_json:
                return SummarizeCommunityOut(
                    comm_id=comm_id,
                    pack_id=actual_pack_id,
                    summary_json={},
                    markdown="",
                    error=MCPErrorModel(code="LLM_ERROR", message="Failed to generate summary"),
                ).model_dump()
            
            # 生成 Markdown
            markdown = summary_to_markdown(summary_json, comm_id)
            
            # 保存到数据库
            with get_db() as conn:
                with conn.cursor() as cur:
                    cur.execute(
                        """
                        INSERT INTO community_summaries(comm_id, summary_json)
                        VALUES (%s, %s::jsonb)
                        ON CONFLICT (comm_id) DO UPDATE
                        SET summary_json = EXCLUDED.summary_json, updated_at = now()
                        """,
                        (comm_id, json.dumps(summary_json))
                    )
            
            return SummarizeCommunityOut(
                comm_id=comm_id,
                pack_id=actual_pack_id,
                summary_json=summary_json,
                markdown=markdown,
            ).model_dump()
            
        except Exception as e:
            import traceback
            traceback.print_exc()
            return SummarizeCommunityOut(
                comm_id=comm_id,
                pack_id=pack_id or 0,
                summary_json={},
                markdown="",
                error=MCPErrorModel(code="LLM_ERROR", message=str(e)),
            ).model_dump()
  • MCP tool registration decorator @mcp.tool() defining the entrypoint summarize_community_v1 which delegates to the core run function.
    @mcp.tool()
    async def summarize_community_v1(
        comm_id: int,
        pack_id: int | None = None,
        llm_model: str | None = None,
        max_chunks: int = 100,
        style: str = "econ_finance",
    ) -> dict[str, Any]:
        """生成社区结构化摘要"""
        return await summarize_community_v1_run(
            comm_id=comm_id,
            pack_id=pack_id,
            llm_model=llm_model,
            max_chunks=max_chunks,
            style=style
        )
  • Pydantic input (SummarizeCommunityIn) and output (SummarizeCommunityOut) models used for schema validation and type hints in the tool implementation.
    class SummarizeCommunityIn(BaseModel):
        """summarize_community_v1 输入"""
        comm_id: int
        pack_id: Optional[int] = None
        llm_model: Optional[str] = None  # 默认使用环境变量 LLM_MODEL
        max_chunks: int = 100
        style: Literal["econ_finance"] = "econ_finance"
    
    
    class SummarizeCommunityOut(BaseModel):
        """summarize_community_v1 输出"""
        comm_id: int
        pack_id: int
        summary_json: dict[str, Any]
        markdown: str
        error: Optional[MCPErrorModel] = None
  • Invocation of the register_graph_summarize_tools function in the main MCP server setup, which registers the summarize_community_v1 tool.
    register_graph_summarize_tools(mcp)
  • Helper function to asynchronously call LLM (OpenRouter) with community evidence prompt to generate the structured JSON summary.
    async def acall_llm_summarize(top_entities: str, evidence_text: str, llm_model: str) -> dict | None:
        """调用 LLM 生成社区摘要 (Async)"""
        settings = get_settings()
        
        if not settings.openrouter_api_key:
            return None
        
        url = f"{settings.openrouter_base_url}/chat/completions"
        headers = {
            "Authorization": f"Bearer {settings.openrouter_api_key}",
            "Content-Type": "application/json",
        }
        
        payload = {
            "model": llm_model,
            "messages": [
                {"role": "system", "content": COMMUNITY_SUMMARY_SYSTEM_PROMPT},
                {"role": "user", "content": COMMUNITY_SUMMARY_USER_PROMPT_TEMPLATE.format(
                    top_entities=top_entities,
                    evidence_text=evidence_text,
                )},
            ],
            "temperature": 0.2,
            "response_format": {"type": "json_object"},
            # OpenRouter 格式:禁用推理模型的 reasoning tokens(如 GPT-5 Nano)
            # 使用 effort: low 最小化推理,exclude: true 不返回推理内容
            "reasoning": {
                "effort": "low",
                "exclude": True,
            },
        }
        
        try:
            async with httpx.AsyncClient(timeout=180.0) as client:
                response = await client.post(url, json=payload, headers=headers)
                response.raise_for_status()
            
            data = response.json()
            content = data["choices"][0]["message"]["content"]
            return json.loads(content)
        
        except Exception as e:
            print(f"LLM summarize error: {e}")
            return None

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/h-lu/paperlib-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server