search_latest_papers
Find recent AI/ML research papers using keywords, date ranges, and multiple academic sources to stay current with developments in the field.
Instructions
Search for latest AI/ML research papers from multiple sources (arXiv, Papers with Code, Hugging Face)
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| keywords | No | Keywords to search for (e.g., ['LLM', 'multimodal']) | |
| days | No | Number of days to look back (1-30) | |
| sources | No | Data sources to search (default: all) | |
| max_results | No | Maximum number of results per source |
Input Schema (JSON Schema)
{
"properties": {
"days": {
"default": 7,
"description": "Number of days to look back (1-30)",
"type": "integer"
},
"keywords": {
"description": "Keywords to search for (e.g., ['LLM', 'multimodal'])",
"items": {
"type": "string"
},
"type": "array"
},
"max_results": {
"default": 20,
"description": "Maximum number of results per source",
"type": "integer"
},
"sources": {
"description": "Data sources to search (default: all)",
"items": {
"enum": [
"arxiv",
"papers_with_code",
"huggingface"
],
"type": "string"
},
"type": "array"
}
},
"type": "object"
}
Implementation Reference
- src/ai_research_mcp/server.py:334-393 (handler)The core handler function that executes the 'search_latest_papers' tool. It searches multiple sources (arXiv, PapersWithCode, HuggingFace) for recent papers matching keywords, applies caching, aggregates results, and formats them using _format_papers.async def _search_latest_papers( self, keywords: Optional[List[str]] = None, days: int = 7, sources: Optional[List[str]] = None, max_results: int = 20, ) -> str: """Search for latest papers.""" if sources is None: sources = ["arxiv", "papers_with_code", "huggingface"] all_papers = [] # Search arXiv if "arxiv" in sources: cache_key = f"arxiv_{keywords}_{days}" cached = self.cache.get(cache_key, self.cache_expiry["arxiv"]) if cached: all_papers.extend(cached) else: papers = await asyncio.to_thread( self.arxiv.search_papers, keywords=keywords, days=days, max_results=max_results, ) self.cache.set(cache_key, papers) all_papers.extend(papers) # Search Papers with Code if "papers_with_code" in sources: cache_key = f"pwc_{keywords}_{days}" cached = self.cache.get(cache_key, self.cache_expiry["arxiv"]) if cached: all_papers.extend(cached) else: papers = await asyncio.to_thread( self.papers_with_code.get_latest_papers, days=days, items_per_page=max_results, ) self.cache.set(cache_key, papers) all_papers.extend(papers) # Get Hugging Face daily papers if "huggingface" in sources: cache_key = f"hf_daily_{days}" cached = self.cache.get(cache_key, self.cache_expiry["arxiv"]) if cached: all_papers.extend(cached) else: papers = await asyncio.to_thread( self.huggingface.get_daily_papers, days=min(days, 7), ) self.cache.set(cache_key, papers) all_papers.extend(papers) # Format results return self._format_papers(all_papers, keywords)
- src/ai_research_mcp/server.py:55-87 (registration)Registers the 'search_latest_papers' tool with the MCP server in the list_tools handler, defining its name, description, and input schema.Tool( name="search_latest_papers", description="Search for latest AI/ML research papers from multiple sources (arXiv, Papers with Code, Hugging Face)", inputSchema={ "type": "object", "properties": { "keywords": { "type": "array", "items": {"type": "string"}, "description": "Keywords to search for (e.g., ['LLM', 'multimodal'])", }, "days": { "type": "integer", "description": "Number of days to look back (1-30)", "default": 7, }, "sources": { "type": "array", "items": { "type": "string", "enum": ["arxiv", "papers_with_code", "huggingface"], }, "description": "Data sources to search (default: all)", }, "max_results": { "type": "integer", "description": "Maximum number of results per source", "default": 20, }, }, }, ), Tool(
- src/ai_research_mcp/server.py:58-85 (schema)Defines the input schema for the 'search_latest_papers' tool, specifying parameters like keywords, days, sources, and max_results with types and descriptions.inputSchema={ "type": "object", "properties": { "keywords": { "type": "array", "items": {"type": "string"}, "description": "Keywords to search for (e.g., ['LLM', 'multimodal'])", }, "days": { "type": "integer", "description": "Number of days to look back (1-30)", "default": 7, }, "sources": { "type": "array", "items": { "type": "string", "enum": ["arxiv", "papers_with_code", "huggingface"], }, "description": "Data sources to search (default: all)", }, "max_results": { "type": "integer", "description": "Maximum number of results per source", "default": 20, }, }, },
- Helper function used by the handler to format the aggregated papers list into a markdown string, including deduplication, sorting by date, and rich display with links and summaries.def _format_papers(self, papers: List[Dict], keywords: Optional[List[str]] = None) -> str: """Format papers as markdown.""" if not papers: return "*No papers found.*" # Deduplicate by title seen_titles = set() unique_papers = [] for paper in papers: title = paper.get("title", "").lower() if title and title not in seen_titles: seen_titles.add(title) unique_papers.append(paper) # Sort by date (most recent first) def get_date(paper): date_str = paper.get("published") or paper.get("updated") or "" try: dt = datetime.fromisoformat(date_str.replace("Z", "+00:00")) # Ensure timezone-aware if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) return dt except: return datetime.min.replace(tzinfo=timezone.utc) unique_papers.sort(key=get_date, reverse=True) lines = [] for i, paper in enumerate(unique_papers, 1): title = paper.get("title", "Untitled") authors = paper.get("authors", []) author_str = authors[0] if authors else "Unknown" if len(authors) > 1: author_str += " et al." url = paper.get("url", "") source = paper.get("source", "unknown") published = paper.get("published", "")[:10] # Just the date lines.append(f"### {i}. [{title}]({url})") lines.append(f"*{author_str} • {published} • {source}*") # Add summary/abstract (truncated) summary = paper.get("summary") or paper.get("abstract", "") if summary: summary = summary.replace("\n", " ")[:200] + "..." lines.append(f"\n{summary}") # Add GitHub link if available github_url = paper.get("github_url") if github_url: stars = paper.get("stars", 0) lines.append(f"\n💻 [Code]({github_url}) {'⭐ ' + str(stars) if stars > 0 else ''}") lines.append("") # Empty line between papers return "\n".join(lines)