AI Research MCP Server

MIT License

Overview InspectNew Endpoints Schema Related Servers Reviews Score

arxiv_client.py•6.92 kB

"""arXiv client for searching AI/ML papers.""" import arxiv from datetime import datetime, timedelta from typing import List, Dict, Optional class ArxivClient: """Client for searching arXiv papers.""" # arXiv categories for AI/ML research AI_CATEGORIES = [ "cs.AI", # Artificial Intelligence "cs.CL", # Computation and Language "cs.LG", # Machine Learning "cs.CV", # Computer Vision "cs.NE", # Neural and Evolutionary Computing "cs.RO", # Robotics "cs.HC", # Human-Computer Interaction "cs.IR", # Information Retrieval "stat.ML", # Statistics - Machine Learning "q-bio.QM", # Quantitative Methods "q-bio.GN", # Genomics "q-bio.BM", # Biomolecules "physics.comp-ph", # Computational Physics "eess.AS", # Audio and Speech Processing "eess.IV", # Image and Video Processing ] # Keywords by AI research area KEYWORDS_BY_AREA = { "llm": ["large language model", "LLM", "GPT", "transformer", "BERT", "instruction tuning", "prompt", "fine-tuning", "RLHF", "alignment"], "vision": ["vision language model", "CLIP", "multimodal", "text-to-image", "diffusion model", "stable diffusion", "image generation", "video generation"], "generative": ["generative model", "GAN", "VAE", "flow-based model", "autoregressive model"], "robotics": ["robot learning", "embodied AI", "manipulation", "navigation", "imitation learning", "sim-to-real"], "bioinfo": ["protein folding", "drug discovery", "molecule generation", "genomics", "AlphaFold", "antibody design"], "science": ["physics-informed neural network", "scientific machine learning", "AI4Science", "molecular dynamics"], "rl": ["reinforcement learning", "multi-agent", "policy gradient", "Q-learning", "offline RL", "reward modeling"], "graph": ["graph neural network", "knowledge graph", "molecular graph", "graph representation learning"], "efficient": ["model compression", "quantization", "pruning", "knowledge distillation", "efficient transformer", "LoRA", "parameter-efficient fine-tuning"], "safety": ["AI safety", "adversarial robustness", "interpretability", "explainability", "fairness", "bias"], "emerging": ["federated learning", "continual learning", "meta-learning", "few-shot learning", "zero-shot learning", "neuromorphic computing"], } def __init__(self): """Initialize arXiv client.""" self.client = arxiv.Client() def search_papers( self, keywords: Optional[List[str]] = None, categories: Optional[List[str]] = None, days: int = 7, max_results: int = 50, sort_by: arxiv.SortCriterion = arxiv.SortCriterion.SubmittedDate, ) -> List[Dict]: """Search for papers on arXiv. Args: keywords: List of keywords to search for (OR condition) categories: List of arXiv categories to filter by (default: all AI categories) days: Number of days to look back max_results: Maximum number of results to return sort_by: Sort criterion (SubmittedDate, Relevance, or LastUpdatedDate) Returns: List of paper dictionaries """ # Build query query_parts = [] # Add keyword search if keywords: keyword_query = " OR ".join(f'"{kw}"' for kw in keywords) query_parts.append(f"({keyword_query})") # Add category filter if categories is None: categories = self.AI_CATEGORIES if categories: cat_query = " OR ".join(f"cat:{cat}" for cat in categories) query_parts.append(f"({cat_query})") # Combine query parts query = " AND ".join(query_parts) if query_parts else "all" # Calculate date range (make timezone-aware) from datetime import timezone date_from = datetime.now(timezone.utc) - timedelta(days=days) # Search arXiv search = arxiv.Search( query=query, max_results=max_results, sort_by=sort_by, sort_order=arxiv.SortOrder.Descending, ) results = [] for paper in self.client.results(search): # Filter by date if paper.published < date_from: continue # Ensure timezone-aware datetimes published_dt = paper.published if published_dt.tzinfo is None: published_dt = published_dt.replace(tzinfo=timezone.utc) updated_dt = paper.updated if updated_dt.tzinfo is None: updated_dt = updated_dt.replace(tzinfo=timezone.utc) results.append({ "title": paper.title, "authors": [author.name for author in paper.authors], "summary": paper.summary, "published": published_dt.isoformat(), "updated": updated_dt.isoformat(), "url": paper.entry_id, "pdf_url": paper.pdf_url, "categories": paper.categories, "primary_category": paper.primary_category, "source": "arxiv", }) return results def get_latest_by_area(self, area: str, days: int = 7, max_results: int = 20) -> List[Dict]: """Get latest papers for a specific research area. Args: area: Research area (e.g., 'llm', 'vision', 'robotics') days: Number of days to look back max_results: Maximum number of results Returns: List of paper dictionaries """ keywords = self.KEYWORDS_BY_AREA.get(area.lower(), []) if not keywords: raise ValueError(f"Unknown area: {area}. Valid areas: {list(self.KEYWORDS_BY_AREA.keys())}") return self.search_papers( keywords=keywords, days=days, max_results=max_results, ) def get_latest_papers(self, days: int = 7, max_results: int = 100) -> List[Dict]: """Get latest papers across all AI categories. Args: days: Number of days to look back max_results: Maximum number of results Returns: List of paper dictionaries """ return self.search_papers( keywords=None, categories=self.AI_CATEGORIES, days=days, max_results=max_results, )

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/nanyang12138/AI-Research-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server