list_documents

Instructions

列出所有已导入的文档

获取文献库中所有文档的摘要列表，支持排序和筛选。

Args: limit: 返回结果数量限制，默认 50 offset: 分页偏移量，默认 0 order_by: 排序字段，可选 "created_at"（默认）、"year"、"title" has_embeddings: 筛选条件，True=只显示有完整embedding的，False=只显示缺embedding的，None=显示全部

Returns: 文档列表，包含基本信息和 chunk/embedding 统计

Input Schema

TableJSON Schema

Name	Required	Default
`limit`	No
`offset`	No
`order_by`	No	created_at
`has_embeddings`	No

Implementation Reference

src/paperlib_mcp/tools/fetch.py:245-361 (handler)
The handler function that implements the logic for listing documents with optional pagination (limit/offset), sorting (order_by), and filtering by embedding status (has_embeddings). It constructs dynamic SQL queries to fetch document metadata and chunk statistics from the database.
@mcp.tool() def list_documents( limit: int = 50, offset: int = 0, order_by: str = "created_at", has_embeddings: bool | None = None, ) -> dict[str, Any]: """列出所有已导入的文档获取文献库中所有文档的摘要列表，支持排序和筛选。 Args: limit: 返回结果数量限制，默认 50 offset: 分页偏移量，默认 0 order_by: 排序字段，可选 "created_at"（默认）、"year"、"title" has_embeddings: 筛选条件，True=只显示有完整embedding的，False=只显示缺embedding的，None=显示全部 Returns: 文档列表，包含基本信息和 chunk/embedding 统计 """ try: # 验证 order_by 参数 valid_order_by = {"created_at": "d.created_at", "year": "d.year", "title": "d.title"} order_column = valid_order_by.get(order_by, "d.created_at") # 构建基础查询 base_query = """ SELECT d.doc_id, d.title, d.authors, d.year, d.created_at::text, COUNT(c.chunk_id) as chunk_count, COUNT(ce.chunk_id) as embedded_count FROM documents d LEFT JOIN chunks c ON d.doc_id = c.doc_id LEFT JOIN chunk_embeddings ce ON c.chunk_id = ce.chunk_id GROUP BY d.doc_id """ # 添加筛选条件 if has_embeddings is True: # 只显示所有 chunk 都有 embedding 的文档 base_query += " HAVING COUNT(c.chunk_id) > 0 AND COUNT(c.chunk_id) = COUNT(ce.chunk_id)" elif has_embeddings is False: # 只显示缺少 embedding 的文档 base_query += " HAVING COUNT(c.chunk_id) > COUNT(ce.chunk_id)" # 添加排序（处理 NULL 值） if order_by == "year": base_query += f" ORDER BY {order_column} DESC NULLS LAST" elif order_by == "title": base_query += f" ORDER BY {order_column} ASC NULLS LAST" else: base_query += f" ORDER BY {order_column} DESC" # 添加分页 base_query += " LIMIT %s OFFSET %s" docs = query_all(base_query, (limit, offset)) # 获取总数（考虑筛选条件） if has_embeddings is True: total_query = """ SELECT COUNT(*) as count FROM ( SELECT d.doc_id FROM documents d LEFT JOIN chunks c ON d.doc_id = c.doc_id LEFT JOIN chunk_embeddings ce ON c.chunk_id = ce.chunk_id GROUP BY d.doc_id HAVING COUNT(c.chunk_id) > 0 AND COUNT(c.chunk_id) = COUNT(ce.chunk_id) ) sub """ elif has_embeddings is False: total_query = """ SELECT COUNT(*) as count FROM ( SELECT d.doc_id FROM documents d LEFT JOIN chunks c ON d.doc_id = c.doc_id LEFT JOIN chunk_embeddings ce ON c.chunk_id = ce.chunk_id GROUP BY d.doc_id HAVING COUNT(c.chunk_id) > COUNT(ce.chunk_id) ) sub """ else: total_query = "SELECT COUNT(*) as count FROM documents" total = query_one(total_query) return { "total": total["count"] if total else 0, "limit": limit, "offset": offset, "order_by": order_by, "has_embeddings_filter": has_embeddings, "documents": [ { "doc_id": d["doc_id"], "title": d["title"], "authors": d["authors"], "year": d["year"], "created_at": d["created_at"], "chunk_count": d["chunk_count"], "embedded_count": d["embedded_count"], "fully_embedded": d["chunk_count"] > 0 and d["chunk_count"] == d["embedded_count"], } for d in docs ], } except Exception as e: return { "error": str(e), "total": 0, "documents": [], }
src/paperlib_mcp/server.py:36-36 (registration)
The line where register_fetch_tools is called on the MCP instance, which in turn registers the list_documents tool via its @mcp.tool() decorator.
register_fetch_tools(mcp)
src/paperlib_mcp/tools/fetch.py:52-52 (registration)
The registration function that defines and registers multiple fetch tools, including list_documents, using @mcp.tool() decorators.
def register_fetch_tools(mcp: FastMCP) -> None:

Paperlib MCP

Instructions

Input Schema

Implementation Reference

Other Tools

Latest Blog Posts

MCP directory API