draft_lit_review_v1
Generate structured literature review drafts by organizing academic sources into standard formats. Use existing evidence packages or retrieve relevant papers to create comprehensive review outlines with proper citations.
Instructions
生成文献综述草稿
基于指定主题或已有证据包,按照学术标准结构组织成综述草稿。
Args: topic: 综述主题/研究问题(如果提供 pack_id 则可选) pack_id: 已有证据包 ID(如果提供则直接使用,不重新检索) k: 检索的相关 chunk 数量(仅当未提供 pack_id 时使用),默认 30 outline_style: 大纲样式,可选 "econ_finance_canonical"(经济金融)或 "general"(通用)
Returns: 综述草稿,包含: - sections: 按结构组织的章节列表 - all_citations: 所有引用的文献信息 - total_sources: 引用的文献总数
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| topic | No | ||
| pack_id | No | ||
| k | No | ||
| outline_style | No | econ_finance_canonical |
Implementation Reference
- The core handler function for the 'draft_lit_review_v1' tool. It generates a literature review draft from a topic or evidence pack, allocates evidence to sections using keyword matching, generates content snippets with citations, and structures output using Pydantic models.@mcp.tool() async def draft_lit_review_v1( topic: str | None = None, pack_id: int | None = None, k: int = 30, outline_style: str = "econ_finance_canonical", ) -> dict[str, Any]: """生成文献综述草稿 基于指定主题或已有证据包,按照学术标准结构组织成综述草稿。 Args: topic: 综述主题/研究问题(如果提供 pack_id 则可选) pack_id: 已有证据包 ID(如果提供则直接使用,不重新检索) k: 检索的相关 chunk 数量(仅当未提供 pack_id 时使用),默认 30 outline_style: 大纲样式,可选 "econ_finance_canonical"(经济金融)或 "general"(通用) Returns: 综述草稿,包含: - sections: 按结构组织的章节列表 - all_citations: 所有引用的文献信息 - total_sources: 引用的文献总数 """ try: # 确定使用的证据来源 evidence_items = [] used_pack_id = None actual_topic = topic if pack_id: # 使用已有证据包 pack = get_evidence_pack(pack_id) if not pack: return { "error": f"Evidence pack not found: {pack_id}", "pack_id": pack_id, } used_pack_id = pack_id actual_topic = topic or pack.query # 转换为统一格式 for item in pack.items: evidence_items.append({ "doc_id": item.doc_id, "chunk_id": item.chunk_id, "page_start": item.page_start, "page_end": item.page_end, "text": item.text, "score": item.score, }) else: if not topic: return { "error": "Must provide either topic or pack_id", } # 执行新的搜索 search_result = await hybrid_search(topic, k=k, alpha=0.6, per_doc_limit=3) if not search_result.results: return { "error": "No relevant literature found for the topic", "topic": topic, "sections": [], "all_citations": [], } for result in search_result.results: # 获取完整文本 chunk = query_one( "SELECT text FROM chunks WHERE chunk_id = %s", (result.chunk_id,) ) evidence_items.append({ "doc_id": result.doc_id, "chunk_id": result.chunk_id, "page_start": result.page_start, "page_end": result.page_end, "text": chunk["text"] if chunk else result.snippet, "score": result.score_total, }) # 2. 获取大纲模板 template = OUTLINE_TEMPLATES.get(outline_style, OUTLINE_TEMPLATES["general"]) # 3. 获取文档元数据 doc_ids = list(set(item["doc_id"] for item in evidence_items)) doc_metadata: dict[str, dict] = {} for doc_id in doc_ids: doc = query_one( "SELECT doc_id, title, authors, year FROM documents WHERE doc_id = %s", (doc_id,) ) if doc: doc_metadata[doc_id] = { "doc_id": doc["doc_id"], "title": doc["title"] or "Untitled", "authors": doc["authors"] or "Unknown", "year": doc["year"], } # 4. 将证据分配到各章节(基于关键词匹配) section_evidence: dict[str, list] = {s["id"]: [] for s in template["sections"]} for item in evidence_items: text_lower = item["text"].lower() best_section = None best_score = 0 for section in template["sections"]: # 计算关键词匹配分数 keywords = section.get("keywords", []) match_count = sum(1 for kw in keywords if kw.lower() in text_lower) if match_count > best_score: best_score = match_count best_section = section["id"] # 如果没有明确匹配,放入第一个章节 if not best_section: best_section = template["sections"][0]["id"] section_evidence[best_section].append(item) # 5. 生成各章节内容 sections = [] all_citations = [] for section_template in template["sections"]: section_id = section_template["id"] section_items = section_evidence.get(section_id, []) # 按分数排序 section_items.sort(key=lambda x: x["score"], reverse=True) # 构建章节内容 content_parts = [] section_citations = [] content_parts.append(f"**{section_template['description']}**\n") for item in section_items[:10]: # 每章节最多 10 条 doc_id = item["doc_id"] meta = doc_metadata.get(doc_id, {"title": "Unknown", "authors": "Unknown", "year": None}) # 添加引用信息 citation = { "doc_id": doc_id, "title": meta["title"], "authors": meta["authors"], "year": meta["year"], "page_start": item["page_start"], "page_end": item["page_end"], "chunk_id": item["chunk_id"], } section_citations.append(citation) # 格式化引用标记 year_str = str(meta["year"]) if meta["year"] else "n.d." cite_key = f"[{meta['authors']}, {year_str}: p.{item['page_start']}-{item['page_end']}]" # 生成摘要 text = item["text"] snippet = text[:300] + "..." if len(text) > 300 else text content_parts.append(f"- {snippet} {cite_key}") if not section_items: content_parts.append("(暂无相关内容)") sections.append(LitReviewSection( section_id=section_id, title=section_template["title"], content="\n\n".join(content_parts), citations=section_citations, )) all_citations.extend(section_citations) # 6. 去重引用列表 unique_citations = [] seen_docs = set() for cite in all_citations: if cite["doc_id"] not in seen_docs: seen_docs.add(cite["doc_id"]) unique_citations.append({ "doc_id": cite["doc_id"], "title": cite["title"], "authors": cite["authors"], "year": cite["year"], }) return LitReviewDraft( topic=actual_topic, outline_style=outline_style, pack_id=used_pack_id, total_sources=len(evidence_items), unique_documents=len(unique_citations), sections=sections, all_citations=unique_citations, ).model_dump() except Exception as e: return { "error": str(e), "topic": topic, "sections": [], "all_citations": [], }
- Pydantic models that define the structured output schema for the literature review draft, including individual sections with content and citations.class LitReviewSection(BaseModel): """综述章节""" section_id: str title: str content: str citations: list[dict[str, Any]] class LitReviewDraft(BaseModel): """综述草稿""" topic: str outline_style: str pack_id: int | None total_sources: int unique_documents: int sections: list[LitReviewSection] all_citations: list[dict[str, Any]]
- src/paperlib_mcp/server.py:37-37 (registration)Explicit registration of the writing tools (including draft_lit_review_v1) to the main MCP server instance.register_writing_tools(mcp)
- Dictionary of outline templates used by the handler to structure the literature review into predefined sections with keywords for evidence allocation.OUTLINE_TEMPLATES = { "econ_finance_canonical": { "name": "经济金融学经典结构", "sections": [ { "id": "research_question", "title": "研究问题与理论框架", "description": "核心研究问题、理论基础和主要假设", "keywords": ["theory", "hypothesis", "framework", "model", "prediction"], }, { "id": "methodology", "title": "方法与识别策略", "description": "实证方法、因果识别、计量模型", "keywords": ["method", "identification", "strategy", "estimation", "regression", "instrumental", "difference-in-differences", "RDD"], }, { "id": "data", "title": "数据与变量度量", "description": "数据来源、样本选择、关键变量定义", "keywords": ["data", "sample", "variable", "measure", "proxy", "definition"], }, { "id": "findings", "title": "主要发现", "description": "核心结论、稳健性检验、异质性分析", "keywords": ["result", "finding", "evidence", "show", "demonstrate", "coefficient", "significant"], }, { "id": "debates", "title": "争议与不一致发现", "description": "文献中的分歧、methodological debates", "keywords": ["debate", "controversy", "inconsistent", "contrast", "however", "limitation"], }, { "id": "gaps", "title": "研究空白与未来方向", "description": "尚未解决的问题、潜在研究机会", "keywords": ["gap", "future", "direction", "unexplored", "opportunity", "need"], }, ], }, "general": { "name": "通用文献综述结构", "sections": [ { "id": "background", "title": "背景与动机", "description": "研究领域概述和重要性", "keywords": ["background", "motivation", "importance", "context"], }, { "id": "theory", "title": "理论基础", "description": "相关理论和概念框架", "keywords": ["theory", "framework", "concept", "model"], }, { "id": "methods", "title": "研究方法", "description": "主要研究方法和技术路线", "keywords": ["method", "approach", "technique", "design"], }, { "id": "findings", "title": "主要发现", "description": "关键研究结论和证据", "keywords": ["result", "finding", "evidence", "conclusion"], }, { "id": "future", "title": "未来研究方向", "description": "研究空白和潜在机会", "keywords": ["future", "direction", "gap", "opportunity"], }, ], },
- Helper function to retrieve and format evidence pack data from the database, used when an existing pack_id is provided instead of new search.def get_evidence_pack(pack_id: int) -> EvidencePack | None: """获取证据包内容 Args: pack_id: 证据包 ID Returns: 证据包对象,如果不存在返回 None """ # 获取证据包元数据 pack = query_one( """ SELECT pack_id, query, params_json, created_at::text FROM evidence_packs WHERE pack_id = %s """, (pack_id,) ) if not pack: return None # 获取证据包条目 items = query_all( """ SELECT epi.doc_id, epi.chunk_id, epi.rank, c.page_start, c.page_end, c.text FROM evidence_pack_items epi JOIN chunks c ON epi.chunk_id = c.chunk_id WHERE epi.pack_id = %s ORDER BY epi.rank """, (pack_id,) ) # 统计 unique_docs = len(set(item["doc_id"] for item in items)) return EvidencePack( pack_id=pack["pack_id"], query=pack["query"], params=pack["params_json"] or {}, items=[ EvidencePackItem( doc_id=item["doc_id"], chunk_id=item["chunk_id"], page_start=item["page_start"], page_end=item["page_end"], text=item["text"], score=1.0 / (item["rank"] + 1) if item["rank"] is not None else 0.5, # 基于排名的伪分数 ) for item in items ], stats={ "total_chunks": len(items), "unique_docs": unique_docs, } )