arxiv_search
Search arXiv papers with plain keywords or field-specific queries (title, author, abstract, category). Filter by recency, sort by relevance or submission date, and choose output format.
Instructions
Search arXiv. Plain keywords work (auto-prefixed all:); for advanced queries use arXiv field syntax: ti: (title), au: (author), abs: (abstract), cat: (category, e.g. cat:eess.IV). days cuts off results older than N days (published field). When days is set, results are sorted by submission date instead of relevance.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| query | Yes | ||
| days | No | ||
| max_results | No | ||
| sort_by | No | relevance | |
| response_format | No | markdown |
Output Schema
| Name | Required | Description | Default |
|---|---|---|---|
| result | Yes |
Implementation Reference
- trends_mcp.py:290-296 (schema)ArxivSearchInput Pydantic model — input schema for arxiv_search tool, defines query (required), days (optional), max_results, sort_by, and response_format fields.
class ArxivSearchInput(BaseModel): model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") query: str = Field(..., min_length=1, max_length=500) days: int | None = Field(None, ge=1, le=3650, description="If set, drop results older than N days (client-side filter).") max_results: int = Field(20, ge=1, le=50) sort_by: str = Field("relevance", pattern=r"^(relevance|submittedDate|lastUpdatedDate)$") response_format: ResponseFormat = ResponseFormat.MARKDOWN - trends_mcp.py:410-477 (registration)Tool registration via @_maybe_tool decorator with source='arxiv' and name='arxiv_search'. Also contains the actual handler function arxiv_search that executes the tool logic.
@_maybe_tool( source="arxiv", name="arxiv_search", description=( "Search arXiv. Plain keywords work (auto-prefixed `all:`); for advanced " "queries use arXiv field syntax: `ti:` (title), `au:` (author), " "`abs:` (abstract), `cat:` (category, e.g. `cat:eess.IV`). " "`days` cuts off results older than N days (`published` field). " "When `days` is set, results are sorted by submission date instead of relevance." ), annotations={ "readOnlyHint": True, "destructiveHint": False, "openWorldHint": True, "idempotentHint": True, }, ) async def arxiv_search( query: str, days: int | None = None, max_results: int = 20, sort_by: str = "relevance", response_format: ResponseFormat = ResponseFormat.MARKDOWN, ) -> str: try: args = ArxivSearchInput( query=query, days=days, max_results=max_results, sort_by=sort_by, response_format=response_format, ) q = args.query if ":" in args.query else f"all:{args.query}" # When `days` is set, force submittedDate sort and over-fetch so the # client-side cutoff can still return up to max_results. effective_sort = "submittedDate" if args.days else args.sort_by fetch_n = min(args.max_results * (5 if args.days else 1), 200) params: dict[str, Any] = { "search_query": q, "start": 0, "max_results": fetch_n, } if effective_sort != "relevance": params["sortBy"] = effective_sort params["sortOrder"] = "descending" ttl = TTL_STATIC if args.sort_by == "relevance" and not args.days else TTL_DEFAULT text = await _http_get_text(ARXIV_API, params=params, ttl=ttl) papers = _parse_arxiv_atom(text) if args.days: cutoff = _utc_now() - timedelta(days=args.days) kept: list[dict[str, Any]] = [] for p in papers: try: pub_dt = datetime.fromisoformat(p["published"].replace("Z", "+00:00")) except ValueError: continue if pub_dt >= cutoff: kept.append(p) if len(kept) >= args.max_results: break papers = kept else: papers = papers[: args.max_results] suffix = f" · 최근 {args.days}일" if args.days else "" header = f"arXiv 검색 `{args.query}`{suffix} ({len(papers)}건)" return _format(papers, args.response_format, render_md=lambda x: _render_arxiv_md(x, header)) except Exception as e: return _handle_error(e, "arxiv_search") - trends_mcp.py:427-477 (handler)The arxiv_search async function — the core handler. It validates input, builds the arXiv API query, fetches XML results, optionally filters by days (client-side date cutoff), and formats results as markdown or JSON.
async def arxiv_search( query: str, days: int | None = None, max_results: int = 20, sort_by: str = "relevance", response_format: ResponseFormat = ResponseFormat.MARKDOWN, ) -> str: try: args = ArxivSearchInput( query=query, days=days, max_results=max_results, sort_by=sort_by, response_format=response_format, ) q = args.query if ":" in args.query else f"all:{args.query}" # When `days` is set, force submittedDate sort and over-fetch so the # client-side cutoff can still return up to max_results. effective_sort = "submittedDate" if args.days else args.sort_by fetch_n = min(args.max_results * (5 if args.days else 1), 200) params: dict[str, Any] = { "search_query": q, "start": 0, "max_results": fetch_n, } if effective_sort != "relevance": params["sortBy"] = effective_sort params["sortOrder"] = "descending" ttl = TTL_STATIC if args.sort_by == "relevance" and not args.days else TTL_DEFAULT text = await _http_get_text(ARXIV_API, params=params, ttl=ttl) papers = _parse_arxiv_atom(text) if args.days: cutoff = _utc_now() - timedelta(days=args.days) kept: list[dict[str, Any]] = [] for p in papers: try: pub_dt = datetime.fromisoformat(p["published"].replace("Z", "+00:00")) except ValueError: continue if pub_dt >= cutoff: kept.append(p) if len(kept) >= args.max_results: break papers = kept else: papers = papers[: args.max_results] suffix = f" · 최근 {args.days}일" if args.days else "" header = f"arXiv 검색 `{args.query}`{suffix} ({len(papers)}건)" return _format(papers, args.response_format, render_md=lambda x: _render_arxiv_md(x, header)) except Exception as e: return _handle_error(e, "arxiv_search") - trends_mcp.py:97-106 (helper)_maybe_tool helper decorator — conditionally registers a tool with FastMCP only if its source ('arxiv') is in ENABLED_SOURCES, otherwise keeps the function defined but not exposed via MCP.
def _maybe_tool(*, source: str, **tool_kwargs: Any) -> Callable[[Callable[..., Any]], Callable[..., Any]]: """Register the tool only if its source is enabled. Otherwise leave the function defined (for internal reuse) but don't expose it via MCP.""" def decorator(fn: Callable[..., Any]) -> Callable[..., Any]: if source in ENABLED_SOURCES: return mcp.tool(**tool_kwargs)(fn) return fn return decorator - trends_mcp.py:299-330 (helper)_parse_arxiv_atom helper — parses arXiv API XML Atom feed into a list of paper dicts with id, url, title, summary, published, updated, authors, and primary_category fields.
def _parse_arxiv_atom(xml_text: str) -> list[dict[str, Any]]: root = ET.fromstring(xml_text) out: list[dict[str, Any]] = [] for entry in root.findall("atom:entry", ATOM_NS): eid = (entry.findtext("atom:id", default="", namespaces=ATOM_NS) or "").strip() title = (entry.findtext("atom:title", default="", namespaces=ATOM_NS) or "").strip() summary = (entry.findtext("atom:summary", default="", namespaces=ATOM_NS) or "").strip() published = (entry.findtext("atom:published", default="", namespaces=ATOM_NS) or "").strip() updated = (entry.findtext("atom:updated", default="", namespaces=ATOM_NS) or "").strip() authors = [ (a.findtext("atom:name", default="", namespaces=ATOM_NS) or "").strip() for a in entry.findall("atom:author", ATOM_NS) ] cats = [ c.attrib.get("term", "") for c in entry.findall("{http://arxiv.org/schemas/atom}primary_category") ] # Extract arXiv id from URL like http://arxiv.org/abs/2604.12345v1 arxiv_id = eid.rsplit("/", 1)[-1] if eid else "" out.append( { "id": arxiv_id, "url": eid, "title": title, "summary": summary, "published": published, "updated": updated, "authors": authors, "primary_category": cats[0] if cats else "", } ) return out