arXiv MCP Server

arxiv-mcp-server
arxiv_mcp

api.py•4.97 KiB

import logging import xml.etree.ElementTree as ET from typing import Dict, List, Optional, Any from urllib.parse import quote_plus import httpx from .models import Paper, SearchResult logger = logging.getLogger(__name__) class ArxivAPI: """arXiv API client.""" def __init__(self, timeout: float = 30.0): self.http_client = httpx.AsyncClient(timeout=timeout) self.base_url = "https://export.arxiv.org/api/query" #"http://export.arxiv.org/api/query" async def search( self, query: str, max_results: int = 10, sort_by: str = "relevance" ) -> SearchResult: """Search arXiv for papers.""" try: encoded_query = quote_plus(query) sort_order = "descending" if sort_by != "relevance" else "ascending" url = f"{self.base_url}?search_query={encoded_query}&start=0&max_results={max_results}&sortBy={sort_by}&sortOrder={sort_order}" response = await self.http_client.get(url) response.raise_for_status() papers = self._parse_response(response.content) return SearchResult( query=query, total_results=len(papers), papers=papers ) except Exception as e: logger.error(f"Error searching arXiv: {e}") return SearchResult( query=query, total_results=0, papers=[], error=str(e) ) async def get_paper(self, arxiv_id: str) -> Optional[Paper]: """Get details for a specific paper.""" try: clean_id = arxiv_id.replace("arXiv:", "").replace("v1", "").replace("v2", "").replace("v3", "") url = f"{self.base_url}?id_list={clean_id}" response = await self.http_client.get(url) response.raise_for_status() papers = self._parse_response(response.content) return papers[0] if papers else None except Exception as e: logger.error(f"Error getting paper {arxiv_id}: {e}") return None def _parse_response(self, content: bytes) -> List[Paper]: """Parse arXiv XML response into Paper objects.""" try: root = ET.fromstring(content) papers = [] for entry in root.findall('.//{http://www.w3.org/2005/Atom}entry'): paper_data = self._parse_paper_entry(entry) if paper_data: papers.append(Paper.from_dict(paper_data)) return papers except Exception as e: logger.error(f"Error parsing response: {e}") return [] def _parse_paper_entry(self, entry) -> Optional[Dict[str, Any]]: """Parse an arXiv entry XML element into a paper dictionary.""" try: ns = {'atom': 'http://www.w3.org/2005/Atom', 'arxiv': 'https://export.arxiv.org/schemas/atom'} # Extract basic information title_elem = entry.find('atom:title', ns) title = title_elem.text.strip().replace('\n', ' ') if title_elem is not None else "Unknown Title" # Extract authors authors = [] for author in entry.findall('atom:author', ns): name_elem = author.find('atom:name', ns) if name_elem is not None: authors.append(name_elem.text.strip()) # Extract abstract summary_elem = entry.find('atom:summary', ns) abstract = summary_elem.text.strip().replace('\n', ' ') if summary_elem is not None else "" # Extract arXiv ID id_elem = entry.find('atom:id', ns) arxiv_url = id_elem.text if id_elem is not None else "" arxiv_id = arxiv_url.split('/')[-1] if arxiv_url else "" # Extract publication date published_elem = entry.find('atom:published', ns) published = published_elem.text[:10] if published_elem is not None else "" # Extract categories categories = [] for category in entry.findall('atom:category', ns): term = category.get('term') if term: categories.append(term) # Find PDF link pdf_url = "" for link in entry.findall('atom:link', ns): if link.get('type') == 'application/pdf': pdf_url = link.get('href', '') break return { "id": arxiv_id, "title": title, "authors": authors, "abstract": abstract, "published": published, "categories": categories, "arxiv_url": arxiv_url, "pdf_url": pdf_url } except Exception as e: logger.error(f"Error parsing paper entry: {e}") return None async def close(self): """Close the HTTP client.""" await self.http_client.aclose()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/1Dark134/arxiv-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

api.py•4.97 KiB