get_paper_data

Instructions

Get detailed information about a specific paper including abstract and available formats.

Input Schema

TableJSON Schema

Name	Required	Description	Default
`paper_id`	Yes

Implementation Reference

mcp_simple_arxiv/server.py:82-131 (handler)
The MCP tool handler for 'get_paper_data'. Registered via @app.tool decorator with annotations. Takes paper_id: str, fetches paper using ArxivClient, formats and returns detailed paper information as string.
@app.tool( annotations={ "title": "Get arXiv Paper Data", "readOnlyHint": True, "openWorldHint": True } ) async def get_paper_data(paper_id: str) -> str: """Get detailed information about a specific paper including abstract and available formats.""" paper = await arxiv_client.get_paper(paper_id) # Format paper details in a readable way with clear sections result = f"Title: {paper['title']}\n\n" # Metadata section result += "Metadata:\n" result += f"- Authors: {', '.join(paper['authors'])}\n" result += f"- Published: {paper['published']}\n" result += f"- Last Updated: {paper['updated']}\n" result += "- Categories: " if paper['primary_category']: result += f"Primary: {paper['primary_category']}" if paper['categories']: result += f", Additional: {', '.join(paper['categories'])}" result += "\n" if paper['doi']: result += f"- DOI: {paper['doi']}\n" if paper["journal_ref"]: result += f"- Journal Reference: {paper['journal_ref']}\n" # Abstract section result += "\nAbstract:\n" result += paper["summary"] result += "\n" # Access options section result += "\nAccess Options:\n" result += "- Abstract page: " + paper["abstract_url"] + "\n" if paper["html_url"]: # Add HTML version if available result += "- Full text HTML version: " + paper["html_url"] + "\n" result += "- PDF version: " + paper["pdf_url"] + "\n" # Additional information section if paper["comment"] or "code" in paper["comment"].lower(): result += "\nAdditional Information:\n" if paper["comment"]: result += "- Comment: " + paper["comment"] + "\n" return result
mcp_simple_arxiv/arxiv_client.py:165-206 (helper)
Helper method in ArxivClient class that queries the arXiv API by paper_id, parses the Atom feed response, and returns structured paper metadata used by the tool handler.
async def get_paper(self, paper_id: str) -> Dict[str, Any]: """ Get detailed information about a specific paper. Args: paper_id: arXiv paper ID (e.g., "2103.08220") Returns: Dictionary containing paper metadata, including: - Basic metadata (title, authors, dates) - Categories (primary and others) - Abstract and comments - URLs (abstract page, PDF version, HTML version if available) - DOI if available """ await self._wait_for_rate_limit() params = { "id_list": paper_id, "max_results": 1 } async with httpx.AsyncClient(timeout=20.0) as client: try: response = await client.get(self.base_url, params=params) response.raise_for_status() feed = feedparser.parse(response.text) if not isinstance(feed, dict) or 'entries' not in feed: logger.error("Invalid response from arXiv API") logger.debug(f"Response text: {response.text[:1000]}...") raise ValueError("Invalid response from arXiv API") if not feed.get('entries'): raise ValueError(f"Paper not found: {paper_id}") return self._parse_entry(feed.entries[0]) except httpx.HTTPError as e: logger.error(f"HTTP error while fetching paper: {e}") raise ValueError(f"arXiv API HTTP error: {str(e)}")
mcp_simple_arxiv/arxiv_client.py:49-112 (helper)
Supporting helper that parses individual arXiv Atom feed entry into the standardized paper dictionary format used by get_paper.
def _parse_entry(self, entry: Dict[str, Any]) -> Dict[str, Any]: """Parse a feed entry into a paper dictionary.""" # Extract PDF and HTML links pdf_url = None abstract_url = None # This is the URL to the abstract page for link in entry.get('links', []): if isinstance(link, dict): if link.get('type') == 'application/pdf': pdf_url = link.get('href') elif link.get('type') == 'text/html': abstract_url = link.get('href') # Get paper ID paper_id = entry.get('id', '').split("/abs/")[-1].rstrip() # Create HTML version URL html_url = self._get_html_url(paper_id) if paper_id else None # Get authors authors = [] for author in entry.get('authors', []): if isinstance(author, dict) and 'name' in author: authors.append(author['name']) elif hasattr(author, 'name'): authors.append(author.name) # Get categories categories = [] primary_category = None # Get primary category if 'arxiv_primary_category' in entry: if isinstance(entry['arxiv_primary_category'], dict): primary_category = entry['arxiv_primary_category'].get('term') elif hasattr(entry['arxiv_primary_category'], 'term'): primary_category = entry['arxiv_primary_category'].term # Get all categories for category in entry.get('tags', []): if isinstance(category, dict) and 'term' in category: categories.append(category['term']) elif hasattr(category, 'term'): categories.append(category.term) # Remove primary category from regular categories if it's there if primary_category and primary_category in categories: categories.remove(primary_category) return { "id": paper_id, "title": self._clean_text(entry.get('title', '')), "authors": authors, "primary_category": primary_category, "categories": categories, "published": entry.get('published', ''), "updated": entry.get('updated', ''), "summary": self._clean_text(entry.get('summary', '')), "comment": self._clean_text(entry.get('arxiv_comment', '')), "journal_ref": entry.get('arxiv_journal_ref', ''), "doi": entry.get('arxiv_doi', ''), "pdf_url": pdf_url, "abstract_url": abstract_url, # URL to abstract page "html_url": html_url # URL to HTML version if available }

mcp-simple-arxiv

Instructions

Input Schema

Implementation Reference

Other Tools

Latest Blog Posts

MCP directory API