query_papers
Search and filter arXiv research papers by date, category, title, or specific IDs to retrieve selected metadata fields from a local database.
Instructions
Query papers from the local database with flexible filtering and field selection.
All filter parameters are combined with AND logic. Within categories, OR logic is used.
If no filter parameters are provided, returns the most recent papers up to max_results.
Args:
date: Filter by publication date in YYYY-MM-DD format (e.g. "2026-03-18")
categories: Filter by one or more arXiv categories (OR logic), e.g. ["cs.AI", "cs.LG"]
title: Filter by title keyword (title field only, not abstract; case-insensitive for ASCII)
entry_ids: Fetch specific papers by their arXiv entry IDs. Typically used alone;
combining with other filters applies AND logic and may return fewer results
than expected if the other conditions do not match.
fields: Fields to return. Valid: entry_id, title, authors, abstract, url, published, updated, categories.
Defaults to: entry_id, title, authors, published, url
max_results: Maximum number of results to return (default: 500)Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| date | No | ||
| categories | No | ||
| title | No | ||
| entry_ids | No | ||
| fields | No | ||
| max_results | No |
Output Schema
| Name | Required | Description | Default |
|---|---|---|---|
| result | Yes |
Implementation Reference
- src/arxiv_mcp_server/server.py:106-166 (handler)The MCP tool handler for "query_papers" which defines the tool's interface and performs input validation.
def query_papers( date: Optional[str] = None, categories: Optional[List[str]] = None, title: Optional[str] = None, entry_ids: Optional[List[str]] = None, fields: Optional[List[str]] = None, max_results: int = 500, ) -> str: """Query papers from the local database with flexible filtering and field selection. All filter parameters are combined with AND logic. Within categories, OR logic is used. If no filter parameters are provided, returns the most recent papers up to max_results. Args: date: Filter by publication date in YYYY-MM-DD format (e.g. "2026-03-18") categories: Filter by one or more arXiv categories (OR logic), e.g. ["cs.AI", "cs.LG"] title: Filter by title keyword (title field only, not abstract; case-insensitive for ASCII) entry_ids: Fetch specific papers by their arXiv entry IDs. Typically used alone; combining with other filters applies AND logic and may return fewer results than expected if the other conditions do not match. fields: Fields to return. Valid: entry_id, title, authors, abstract, url, published, updated, categories. Defaults to: entry_id, title, authors, published, url max_results: Maximum number of results to return (default: 500) """ active_fields = fields if fields is not None else DEFAULT_FIELDS invalid = [f for f in active_fields if f not in VALID_FIELDS] if invalid: return json.dumps( {"error": f"Invalid field(s): {invalid}. Valid fields: {sorted(VALID_FIELDS)}"}, ensure_ascii=False, ) date_re = re.compile(r"^\d{4}-\d{2}-\d{2}$") if date and not date_re.match(date): return json.dumps( {"error": f"Invalid date format: {date!r}. Expected YYYY-MM-DD."}, ensure_ascii=False, indent=2, ) logger.info( f"Querying papers: date={date!r}, categories={categories}, " f"title={title!r}, entry_ids={entry_ids}, fields={active_fields}, max={max_results}" ) db = _get_db() papers = db.query_papers( date=date, categories=categories, title=title, entry_ids=entry_ids, max_results=max_results, ) return json.dumps( { "total": len(papers), "papers": [_build_paper_dict(p, active_fields) for p in papers], }, ensure_ascii=False, indent=2, ) - src/arxiv_mcp_server/db.py:82-126 (handler)The underlying database logic that performs the SQL query for papers.
def query_papers( self, date: Optional[str] = None, categories: Optional[List[str]] = None, title: Optional[str] = None, entry_ids: Optional[List[str]] = None, max_results: int = 500, ) -> List[ArxivPaper]: conditions = [] params: List[Any] = [] if date: conditions.append("DATE(published) = ?") params.append(date) if categories: cat_clauses = ["categories LIKE ? ESCAPE '\\'" for _ in categories] conditions.append("(" + " OR ".join(cat_clauses) + ")") params.extend(f'%"{self._escape_like(c)}"%' for c in categories) if title: conditions.append("title LIKE ? ESCAPE '\\'") params.append(f"%{self._escape_like(title)}%") if entry_ids: placeholders = ",".join("?" for _ in entry_ids) conditions.append(f"entry_id IN ({placeholders})") params.extend(entry_ids) where = ("WHERE " + " AND ".join(conditions)) if conditions else "" sql = f""" SELECT entry_id, updated, published, title, summary, authors, categories, viewed FROM papers {where} ORDER BY published DESC LIMIT ? """ params.append(max_results) with sqlite3.connect(self.database_path) as conn: cursor = conn.cursor() cursor.execute(sql, params) return [self.convert_to_paper(row) for row in cursor.fetchall()]