Skip to main content
Glama
nanyang12138

AI Research MCP Server

by nanyang12138

search_latest_papers

Find recent AI/ML research papers by searching arXiv, Papers with Code, and Hugging Face using keywords and date filters.

Instructions

Search for latest AI/ML research papers from multiple sources (arXiv, Papers with Code, Hugging Face)

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
keywordsNoKeywords to search for (e.g., ['LLM', 'multimodal'])
daysNoNumber of days to look back (1-30)
sourcesNoData sources to search (default: all)
max_resultsNoMaximum number of results per source

Implementation Reference

  • The primary handler function that executes the search_latest_papers tool. It queries arXiv, PapersWithCode, and HuggingFace based on input parameters, uses caching, aggregates results, and formats them using _format_papers.
    async def _search_latest_papers(
        self,
        keywords: Optional[List[str]] = None,
        days: int = 7,
        sources: Optional[List[str]] = None,
        max_results: int = 20,
    ) -> str:
        """Search for latest papers."""
        if sources is None:
            sources = ["arxiv", "papers_with_code", "huggingface"]
        
        all_papers = []
        
        # Search arXiv
        if "arxiv" in sources:
            cache_key = f"arxiv_{keywords}_{days}"
            cached = self.cache.get(cache_key, self.cache_expiry["arxiv"])
            if cached:
                all_papers.extend(cached)
            else:
                papers = await asyncio.to_thread(
                    self.arxiv.search_papers,
                    keywords=keywords,
                    days=days,
                    max_results=max_results,
                )
                self.cache.set(cache_key, papers)
                all_papers.extend(papers)
        
        # Search Papers with Code
        if "papers_with_code" in sources:
            cache_key = f"pwc_{keywords}_{days}"
            cached = self.cache.get(cache_key, self.cache_expiry["arxiv"])
            if cached:
                all_papers.extend(cached)
            else:
                papers = await asyncio.to_thread(
                    self.papers_with_code.get_latest_papers,
                    days=days,
                    items_per_page=max_results,
                )
                self.cache.set(cache_key, papers)
                all_papers.extend(papers)
        
        # Get Hugging Face daily papers
        if "huggingface" in sources:
            cache_key = f"hf_daily_{days}"
            cached = self.cache.get(cache_key, self.cache_expiry["arxiv"])
            if cached:
                all_papers.extend(cached)
            else:
                papers = await asyncio.to_thread(
                    self.huggingface.get_daily_papers,
                    days=min(days, 7),
                )
                self.cache.set(cache_key, papers)
                all_papers.extend(papers)
        
        # Format results
        return self._format_papers(all_papers, keywords)
  • Registers the search_latest_papers tool in the MCP server's list_tools handler, defining its name, description, and input schema.
    Tool(
        name="search_latest_papers",
        description="Search for latest AI/ML research papers from multiple sources (arXiv, Papers with Code, Hugging Face)",
        inputSchema={
            "type": "object",
            "properties": {
                "keywords": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "Keywords to search for (e.g., ['LLM', 'multimodal'])",
                },
                "days": {
                    "type": "integer",
                    "description": "Number of days to look back (1-30)",
                    "default": 7,
                },
                "sources": {
                    "type": "array",
                    "items": {
                        "type": "string",
                        "enum": ["arxiv", "papers_with_code", "huggingface"],
                    },
                    "description": "Data sources to search (default: all)",
                },
                "max_results": {
                    "type": "integer",
                    "description": "Maximum number of results per source",
                    "default": 20,
                },
            },
        },
    ),
  • Defines the JSON schema for input validation of the search_latest_papers tool parameters.
    inputSchema={
        "type": "object",
        "properties": {
            "keywords": {
                "type": "array",
                "items": {"type": "string"},
                "description": "Keywords to search for (e.g., ['LLM', 'multimodal'])",
            },
            "days": {
                "type": "integer",
                "description": "Number of days to look back (1-30)",
                "default": 7,
            },
            "sources": {
                "type": "array",
                "items": {
                    "type": "string",
                    "enum": ["arxiv", "papers_with_code", "huggingface"],
                },
                "description": "Data sources to search (default: all)",
            },
            "max_results": {
                "type": "integer",
                "description": "Maximum number of results per source",
                "default": 20,
            },
        },
    },
  • Tool dispatch logic in the generic call_tool handler that routes calls to search_latest_papers to the specific implementation.
    if name == "search_latest_papers":
        result = await self._search_latest_papers(**arguments)
    elif name == "search_github_repos":
  • Helper function called by the handler to format the aggregated papers list into a markdown string with deduplication, sorting by date, and rich details.
    def _format_papers(self, papers: List[Dict], keywords: Optional[List[str]] = None) -> str:
        """Format papers as markdown."""
        if not papers:
            return "*No papers found.*"
        
        # Deduplicate by title
        seen_titles = set()
        unique_papers = []
        for paper in papers:
            title = paper.get("title", "").lower()
            if title and title not in seen_titles:
                seen_titles.add(title)
                unique_papers.append(paper)
        
        # Sort by date (most recent first)
        def get_date(paper):
            date_str = paper.get("published") or paper.get("updated") or ""
            try:
                dt = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
                # Ensure timezone-aware
                if dt.tzinfo is None:
                    dt = dt.replace(tzinfo=timezone.utc)
                return dt
            except:
                return datetime.min.replace(tzinfo=timezone.utc)
        
        unique_papers.sort(key=get_date, reverse=True)
        
        lines = []
        for i, paper in enumerate(unique_papers, 1):
            title = paper.get("title", "Untitled")
            authors = paper.get("authors", [])
            author_str = authors[0] if authors else "Unknown"
            if len(authors) > 1:
                author_str += " et al."
            
            url = paper.get("url", "")
            source = paper.get("source", "unknown")
            published = paper.get("published", "")[:10]  # Just the date
            
            lines.append(f"### {i}. [{title}]({url})")
            lines.append(f"*{author_str} • {published} • {source}*")
            
            # Add summary/abstract (truncated)
            summary = paper.get("summary") or paper.get("abstract", "")
            if summary:
                summary = summary.replace("\n", " ")[:200] + "..."
                lines.append(f"\n{summary}")
            
            # Add GitHub link if available
            github_url = paper.get("github_url")
            if github_url:
                stars = paper.get("stars", 0)
                lines.append(f"\n💻 [Code]({github_url}) {'⭐ ' + str(stars) if stars > 0 else ''}")
            
            lines.append("")  # Empty line between papers
        
        return "\n".join(lines)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/nanyang12138/AI-Research-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server