Skip to main content
Glama

summarize_community_v1

Generate structured summaries for academic communities by analyzing literature, identifying key insights, and presenting findings in a specified format.

Instructions

生成社区结构化摘要

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
comm_idYes
pack_idNo
llm_modelNo
max_chunksNo
styleNoecon_finance

Implementation Reference

  • Core handler function that executes the tool logic: validates community, builds evidence pack from top entities' mentions/chunks, formats evidence for prompt, calls LLM asynchronously via OpenRouter for structured JSON summary, generates Markdown, saves summary to DB, returns structured output.
    async def summarize_community_v1_run( comm_id: int, pack_id: int | None = None, llm_model: str | None = None, max_chunks: int = 100, style: str = "econ_finance", ) -> dict[str, Any]: """生成社区结构化摘要 (Core Implementation)""" try: settings = get_settings() # 优先使用 llm_summarize_model,留空则回退到 llm_model actual_llm_model = llm_model or settings.llm_summarize_model or settings.llm_model # 验证社区存在 community = query_one( "SELECT comm_id, level FROM communities WHERE comm_id = %s", (comm_id,) ) if not community: return SummarizeCommunityOut( comm_id=comm_id, pack_id=0, summary_json={}, markdown="", error=MCPErrorModel(code="NOT_FOUND", message=f"Community {comm_id} not found"), ).model_dump() # 获取或创建证据包 actual_pack_id = pack_id if not actual_pack_id: # 导入并调用 build_community_evidence_pack # 注意:此处避免循环导入,如果 graph_community 依赖 graph_summarize 可能会有问题 # 但此处只引用工具注册,应该没事。或者直接复用 query 逻辑。 # 为了简单,保持原有逻辑 members = query_all( """ SELECT entity_id, weight FROM community_members WHERE comm_id = %s ORDER BY weight DESC """, (comm_id,) ) if not members: return SummarizeCommunityOut( comm_id=comm_id, pack_id=0, summary_json={}, markdown="", error=MCPErrorModel(code="NOT_FOUND", message="No members in community"), ).model_dump() entity_ids = [m["entity_id"] for m in members] # 获取 mentions -> chunks mentions = query_all( """ SELECT m.doc_id, m.chunk_id, MAX(m.confidence) AS conf FROM mentions m WHERE m.entity_id = ANY(%s) GROUP BY m.doc_id, m.chunk_id ORDER BY conf DESC LIMIT 5000 """, (entity_ids,) ) # 应用 per_doc_limit doc_counts: dict[str, int] = defaultdict(int) selected_chunks: list[tuple[str, int]] = [] per_doc_limit = 4 for m in mentions: if doc_counts[m["doc_id"]] < per_doc_limit: selected_chunks.append((m["doc_id"], m["chunk_id"])) doc_counts[m["doc_id"]] += 1 if len(selected_chunks) >= max_chunks: break if not selected_chunks: return SummarizeCommunityOut( comm_id=comm_id, pack_id=0, summary_json={}, markdown="", error=MCPErrorModel(code="NOT_FOUND", message="No chunks found for community"), ).model_dump() # 创建证据包 with get_db() as conn: with conn.cursor() as cur: cur.execute( """ INSERT INTO evidence_packs(query, params_json) VALUES (%s, %s::jsonb) RETURNING pack_id """, ( f"Community {comm_id} summary", json.dumps({"comm_id": comm_id, "for": "summary"}) ) ) result = cur.fetchone() actual_pack_id = result["pack_id"] for rank, (doc_id, chunk_id) in enumerate(selected_chunks): cur.execute( """ INSERT INTO evidence_pack_items(pack_id, doc_id, chunk_id, rank) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING """, (actual_pack_id, doc_id, chunk_id, rank) ) # 获取证据包内容 evidence_items = query_all( """ SELECT i.doc_id, i.chunk_id, c.page_start, c.page_end, c.text, d.title, d.authors, d.year FROM evidence_pack_items i JOIN chunks c ON c.chunk_id = i.chunk_id JOIN documents d ON d.doc_id = i.doc_id WHERE i.pack_id = %s ORDER BY i.rank """, (actual_pack_id,) ) if not evidence_items: return SummarizeCommunityOut( comm_id=comm_id, pack_id=actual_pack_id, summary_json={}, markdown="", error=MCPErrorModel(code="NOT_FOUND", message="Evidence pack is empty"), ).model_dump() # 获取 top entities top_entities = query_all( """ SELECT e.canonical_name, e.type, cm.weight FROM community_members cm JOIN entities e ON e.entity_id = cm.entity_id WHERE cm.comm_id = %s ORDER BY cm.weight DESC LIMIT 20 """, (comm_id,) ) top_entities_str = ", ".join([ f"{e['canonical_name']} ({e['type']})" for e in top_entities ]) # 格式化证据 evidence_text = format_evidence_for_prompt(evidence_items) # 调用 LLM (Async) summary_json = await acall_llm_summarize(top_entities_str, evidence_text, actual_llm_model) if not summary_json: return SummarizeCommunityOut( comm_id=comm_id, pack_id=actual_pack_id, summary_json={}, markdown="", error=MCPErrorModel(code="LLM_ERROR", message="Failed to generate summary"), ).model_dump() # 生成 Markdown markdown = summary_to_markdown(summary_json, comm_id) # 保存到数据库 with get_db() as conn: with conn.cursor() as cur: cur.execute( """ INSERT INTO community_summaries(comm_id, summary_json) VALUES (%s, %s::jsonb) ON CONFLICT (comm_id) DO UPDATE SET summary_json = EXCLUDED.summary_json, updated_at = now() """, (comm_id, json.dumps(summary_json)) ) return SummarizeCommunityOut( comm_id=comm_id, pack_id=actual_pack_id, summary_json=summary_json, markdown=markdown, ).model_dump() except Exception as e: import traceback traceback.print_exc() return SummarizeCommunityOut( comm_id=comm_id, pack_id=pack_id or 0, summary_json={}, markdown="", error=MCPErrorModel(code="LLM_ERROR", message=str(e)), ).model_dump()
  • MCP tool registration decorator @mcp.tool() defining the entrypoint summarize_community_v1 which delegates to the core run function.
    @mcp.tool() async def summarize_community_v1( comm_id: int, pack_id: int | None = None, llm_model: str | None = None, max_chunks: int = 100, style: str = "econ_finance", ) -> dict[str, Any]: """生成社区结构化摘要""" return await summarize_community_v1_run( comm_id=comm_id, pack_id=pack_id, llm_model=llm_model, max_chunks=max_chunks, style=style )
  • Pydantic input (SummarizeCommunityIn) and output (SummarizeCommunityOut) models used for schema validation and type hints in the tool implementation.
    class SummarizeCommunityIn(BaseModel): """summarize_community_v1 输入""" comm_id: int pack_id: Optional[int] = None llm_model: Optional[str] = None # 默认使用环境变量 LLM_MODEL max_chunks: int = 100 style: Literal["econ_finance"] = "econ_finance" class SummarizeCommunityOut(BaseModel): """summarize_community_v1 输出""" comm_id: int pack_id: int summary_json: dict[str, Any] markdown: str error: Optional[MCPErrorModel] = None
  • Invocation of the register_graph_summarize_tools function in the main MCP server setup, which registers the summarize_community_v1 tool.
    register_graph_summarize_tools(mcp)
  • Helper function to asynchronously call LLM (OpenRouter) with community evidence prompt to generate the structured JSON summary.
    async def acall_llm_summarize(top_entities: str, evidence_text: str, llm_model: str) -> dict | None: """调用 LLM 生成社区摘要 (Async)""" settings = get_settings() if not settings.openrouter_api_key: return None url = f"{settings.openrouter_base_url}/chat/completions" headers = { "Authorization": f"Bearer {settings.openrouter_api_key}", "Content-Type": "application/json", } payload = { "model": llm_model, "messages": [ {"role": "system", "content": COMMUNITY_SUMMARY_SYSTEM_PROMPT}, {"role": "user", "content": COMMUNITY_SUMMARY_USER_PROMPT_TEMPLATE.format( top_entities=top_entities, evidence_text=evidence_text, )}, ], "temperature": 0.2, "response_format": {"type": "json_object"}, # OpenRouter 格式:禁用推理模型的 reasoning tokens(如 GPT-5 Nano) # 使用 effort: low 最小化推理,exclude: true 不返回推理内容 "reasoning": { "effort": "low", "exclude": True, }, } try: async with httpx.AsyncClient(timeout=180.0) as client: response = await client.post(url, json=payload, headers=headers) response.raise_for_status() data = response.json() content = data["choices"][0]["message"]["content"] return json.loads(content) except Exception as e: print(f"LLM summarize error: {e}") return None

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/h-lu/paperlib-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server