Skip to main content
Glama

lint_section_v1

Validate markdown citations in academic writing by checking compliance with citation rules, ensuring proper attribution and formatting for evidence-based content.

Instructions

验证章节引用合规

检查 Agent 写作的 markdown 是否符合引用规则。

Args: pack_id: 证据包 ID markdown: Agent 写作的 markdown 内容 require_citations_per_paragraph: 是否要求每段有引用,默认 False min_citations_per_paragraph: 每段最少引用数,默认 1

Returns: passed, issues[], stats

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
pack_idYes
markdownYes
require_citations_per_paragraphNo
min_citations_per_paragraphNo

Implementation Reference

  • The main execution logic for the 'lint_section_v1' tool. This function validates the citations in the provided markdown against the chunks in the specified evidence pack (pack_id). It checks for valid chunk existence, pack membership, paragraph citation density (optional), and single-source dominance.
    @mcp.tool()
    def lint_section_v1(
        pack_id: int,
        markdown: str,
        require_citations_per_paragraph: bool = False,
        min_citations_per_paragraph: int = 1,
    ) -> dict[str, Any]:
        """验证章节引用合规
    
        检查 Agent 写作的 markdown 是否符合引用规则。
    
        Args:
            pack_id: 证据包 ID
            markdown: Agent 写作的 markdown 内容
            require_citations_per_paragraph: 是否要求每段有引用,默认 False
            min_citations_per_paragraph: 每段最少引用数,默认 1
    
        Returns:
            passed, issues[], stats
        """
        try:
            # 获取 pack 中所有 chunk_ids
            pack_chunks = query_all(
                "SELECT chunk_id FROM evidence_pack_items WHERE pack_id = %s",
                (pack_id,),
            )
            if not pack_chunks:
                return {"error": f"Pack not found or empty: {pack_id}"}
    
            valid_chunk_ids = {row["chunk_id"] for row in pack_chunks}
    
            # 解析 markdown 中的引用
            # 格式: [[chunk:<chunk_id>]]
            citation_pattern = r"\[\[chunk:(\d+)\]\]"
            citations = re.findall(citation_pattern, markdown)
            cited_chunk_ids = [int(c) for c in citations]
    
            issues = []
            valid_citations = 0
            invalid_citations = 0
    
            # 检查每个引用
            for chunk_id in cited_chunk_ids:
                # 检查是否存在
                exists = query_one(
                    "SELECT chunk_id FROM chunks WHERE chunk_id = %s",
                    (chunk_id,),
                )
                if not exists:
                    issues.append({
                        "severity": "error",
                        "rule": "CHUNK_NOT_FOUND",
                        "chunk_id": chunk_id,
                        "message": f"Chunk {chunk_id} does not exist in database",
                        "suggestion": "Remove this citation or use a valid chunk_id from the evidence pack",
                    })
                    invalid_citations += 1
                    continue
    
                # 检查是否在 pack 内
                if chunk_id not in valid_chunk_ids:
                    issues.append({
                        "severity": "error",
                        "rule": "CHUNK_OUT_OF_PACK",
                        "chunk_id": chunk_id,
                        "message": f"Chunk {chunk_id} is not in evidence pack {pack_id}",
                        "suggestion": "Only cite chunks from the provided evidence pack",
                    })
                    invalid_citations += 1
                    continue
    
                valid_citations += 1
    
            # 检查段落引用密度(可选)
            if require_citations_per_paragraph:
                # 按段落分割
                paragraphs = [p.strip() for p in markdown.split("\n\n") if p.strip()]
                for i, para in enumerate(paragraphs):
                    # 跳过标题行
                    if para.startswith("#"):
                        continue
                    # 统计该段落的引用
                    para_citations = re.findall(citation_pattern, para)
                    if len(para_citations) < min_citations_per_paragraph:
                        issues.append({
                            "severity": "warning",
                            "rule": "LOW_PARAGRAPH_DENSITY",
                            "paragraph_index": i,
                            "message": f"Paragraph {i+1} has {len(para_citations)} citations (minimum: {min_citations_per_paragraph})",
                            "suggestion": f"Add at least {min_citations_per_paragraph - len(para_citations)} more citation(s) to this paragraph",
                        })
    
            # 检查单一来源主导
            if cited_chunk_ids:
                # 获取每个 chunk 的 doc_id
                chunk_docs = {}
                for chunk_id in set(cited_chunk_ids):
                    doc = query_one(
                        "SELECT doc_id FROM chunks WHERE chunk_id = %s",
                        (chunk_id,),
                    )
                    if doc:
                        chunk_docs[chunk_id] = doc["doc_id"]
    
                # 统计每个文档被引用次数
                doc_cite_counts: dict[str, int] = defaultdict(int)
                for chunk_id in cited_chunk_ids:
                    doc_id = chunk_docs.get(chunk_id)
                    if doc_id:
                        doc_cite_counts[doc_id] += 1
    
                total = len(cited_chunk_ids)
                for doc_id, count in doc_cite_counts.items():
                    if count / total > 0.5:
                        issues.append({
                            "severity": "warning",
                            "rule": "SINGLE_SOURCE_DOMINANT",
                            "doc_id": doc_id,
                            "message": f"Document {doc_id[:16]}... accounts for {count}/{total} ({count*100//total}%) of citations",
                            "suggestion": "Consider diversifying citations across multiple sources",
                        })
    
            # 判断是否通过
            has_errors = any(issue["severity"] == "error" for issue in issues)
    
            return {
                "passed": not has_errors,
                "issues": issues,
                "stats": {
                    "total_citations": len(cited_chunk_ids),
                    "valid_citations": valid_citations,
                    "invalid_citations": invalid_citations,
                    "unique_chunks_cited": len(set(cited_chunk_ids)),
                    "pack_chunk_count": len(valid_chunk_ids),
                    "citation_coverage": valid_citations / len(valid_chunk_ids) if valid_chunk_ids else 0,
                },
            }
    
        except Exception as e:
            return {"error": str(e), "passed": False}
  • The call to register_review_tools(mcp) registers all review tools, including 'lint_section_v1', to the FastMCP server instance.
    register_review_tools(mcp)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/h-lu/paperlib-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server