list_documents
Retrieve and filter imported academic documents from your literature library with sorting options and embedding status filters.
Instructions
列出所有已导入的文档
获取文献库中所有文档的摘要列表,支持排序和筛选。
Args: limit: 返回结果数量限制,默认 50 offset: 分页偏移量,默认 0 order_by: 排序字段,可选 "created_at"(默认)、"year"、"title" has_embeddings: 筛选条件,True=只显示有完整embedding的,False=只显示缺embedding的,None=显示全部
Returns: 文档列表,包含基本信息和 chunk/embedding 统计
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| limit | No | ||
| offset | No | ||
| order_by | No | created_at | |
| has_embeddings | No |
Implementation Reference
- src/paperlib_mcp/tools/fetch.py:245-361 (handler)The handler function that implements the logic for listing documents with optional pagination (limit/offset), sorting (order_by), and filtering by embedding status (has_embeddings). It constructs dynamic SQL queries to fetch document metadata and chunk statistics from the database.@mcp.tool() def list_documents( limit: int = 50, offset: int = 0, order_by: str = "created_at", has_embeddings: bool | None = None, ) -> dict[str, Any]: """列出所有已导入的文档 获取文献库中所有文档的摘要列表,支持排序和筛选。 Args: limit: 返回结果数量限制,默认 50 offset: 分页偏移量,默认 0 order_by: 排序字段,可选 "created_at"(默认)、"year"、"title" has_embeddings: 筛选条件,True=只显示有完整embedding的,False=只显示缺embedding的,None=显示全部 Returns: 文档列表,包含基本信息和 chunk/embedding 统计 """ try: # 验证 order_by 参数 valid_order_by = {"created_at": "d.created_at", "year": "d.year", "title": "d.title"} order_column = valid_order_by.get(order_by, "d.created_at") # 构建基础查询 base_query = """ SELECT d.doc_id, d.title, d.authors, d.year, d.created_at::text, COUNT(c.chunk_id) as chunk_count, COUNT(ce.chunk_id) as embedded_count FROM documents d LEFT JOIN chunks c ON d.doc_id = c.doc_id LEFT JOIN chunk_embeddings ce ON c.chunk_id = ce.chunk_id GROUP BY d.doc_id """ # 添加筛选条件 if has_embeddings is True: # 只显示所有 chunk 都有 embedding 的文档 base_query += " HAVING COUNT(c.chunk_id) > 0 AND COUNT(c.chunk_id) = COUNT(ce.chunk_id)" elif has_embeddings is False: # 只显示缺少 embedding 的文档 base_query += " HAVING COUNT(c.chunk_id) > COUNT(ce.chunk_id)" # 添加排序(处理 NULL 值) if order_by == "year": base_query += f" ORDER BY {order_column} DESC NULLS LAST" elif order_by == "title": base_query += f" ORDER BY {order_column} ASC NULLS LAST" else: base_query += f" ORDER BY {order_column} DESC" # 添加分页 base_query += " LIMIT %s OFFSET %s" docs = query_all(base_query, (limit, offset)) # 获取总数(考虑筛选条件) if has_embeddings is True: total_query = """ SELECT COUNT(*) as count FROM ( SELECT d.doc_id FROM documents d LEFT JOIN chunks c ON d.doc_id = c.doc_id LEFT JOIN chunk_embeddings ce ON c.chunk_id = ce.chunk_id GROUP BY d.doc_id HAVING COUNT(c.chunk_id) > 0 AND COUNT(c.chunk_id) = COUNT(ce.chunk_id) ) sub """ elif has_embeddings is False: total_query = """ SELECT COUNT(*) as count FROM ( SELECT d.doc_id FROM documents d LEFT JOIN chunks c ON d.doc_id = c.doc_id LEFT JOIN chunk_embeddings ce ON c.chunk_id = ce.chunk_id GROUP BY d.doc_id HAVING COUNT(c.chunk_id) > COUNT(ce.chunk_id) ) sub """ else: total_query = "SELECT COUNT(*) as count FROM documents" total = query_one(total_query) return { "total": total["count"] if total else 0, "limit": limit, "offset": offset, "order_by": order_by, "has_embeddings_filter": has_embeddings, "documents": [ { "doc_id": d["doc_id"], "title": d["title"], "authors": d["authors"], "year": d["year"], "created_at": d["created_at"], "chunk_count": d["chunk_count"], "embedded_count": d["embedded_count"], "fully_embedded": d["chunk_count"] > 0 and d["chunk_count"] == d["embedded_count"], } for d in docs ], } except Exception as e: return { "error": str(e), "total": 0, "documents": [], }
- src/paperlib_mcp/server.py:36-36 (registration)The line where register_fetch_tools is called on the MCP instance, which in turn registers the list_documents tool via its @mcp.tool() decorator.register_fetch_tools(mcp)
- src/paperlib_mcp/tools/fetch.py:52-52 (registration)The registration function that defines and registers multiple fetch tools, including list_documents, using @mcp.tool() decorators.def register_fetch_tools(mcp: FastMCP) -> None: