Skip to main content
Glama
emi-dm

ArxivSearcher MCP Server

by emi-dm

find_related_papers

Discover academic papers related to a specific research title by analyzing keyword similarity, with options to filter results and set relevance thresholds.

Instructions

Find papers related to a given paper title using keyword similarity.

:param paper_title: Title of the reference paper :param max_results: Maximum number of related papers to return :param similarity_threshold: Minimum similarity score (0.0 to 1.0) :param category: Optional category filter

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
paper_titleYes
max_resultsNo
similarity_thresholdNo
categoryNo

Implementation Reference

  • Handler function implementing the 'find_related_papers' tool. Extracts keywords from input paper title, constructs arXiv search query, retrieves candidate papers, filters by keyword overlap similarity threshold, sorts and returns top related papers.
    @mcp.tool
    def find_related_papers(
        paper_title: str,
        max_results: int = 10,
        similarity_threshold: float = 0.7,
        category: str | None = None,
    ) -> dict:
        """
        Find papers related to a given paper title using keyword similarity.
    
        :param paper_title: Title of the reference paper
        :param max_results: Maximum number of related papers to return
        :param similarity_threshold: Minimum similarity score (0.0 to 1.0)
        :param category: Optional category filter
        """
        try:
            # Extract keywords from the title
            stop_words = {
                "a",
                "an",
                "and",
                "the",
                "of",
                "in",
                "for",
                "to",
                "with",
                "on",
                "is",
                "are",
                "was",
                "were",
                "it",
            }
    
            keywords = [
                word.lower()
                for word in re.findall(r"\b\w+\b", paper_title)
                if word.lower() not in stop_words and len(word) > 2
            ]
    
            if not keywords:
                return {"error": "No meaningful keywords found in title"}
    
            # Create search query from keywords
            keyword_query = " OR ".join([f'(ti:"{kw}" OR abs:"{kw}")' for kw in keywords])
            query_parts = [f"({keyword_query})"]
    
            if category:
                query_parts.append(f"cat:{category}")
    
            final_query = " AND ".join(query_parts)
    
            # Search for related papers
            search = arxiv.Search(
                query=final_query,
                max_results=max_results * 2,  # Get more results to filter by similarity
                sort_by=arxiv.SortCriterion.Relevance,
                sort_order=arxiv.SortOrder.Descending,
            )
    
            results = []
    
            for r in search.results():
                # Calculate simple similarity based on keyword overlap
                paper_text = f"{r.title} {r.summary}".lower()
    
                # Count keyword matches
                matches = sum(1 for kw in keywords if kw in paper_text)
                similarity = matches / len(keywords) if keywords else 0
    
                if similarity >= similarity_threshold:
                    results.append(
                        {
                            "title": r.title,
                            "authors": [a.name for a in r.authors],
                            "summary": r.summary[:500] + "..."
                            if len(r.summary) > 500
                            else r.summary,
                            "pdf_url": r.pdf_url,
                            "published_date": r.published.strftime("%Y-%m-%d"),
                            "similarity_score": round(similarity, 3),
                            "arxiv_id": r.entry_id.split("/")[-1],
                        }
                    )
    
            # Sort by similarity score and limit results
            results.sort(key=lambda x: x["similarity_score"], reverse=True)
            results = results[:max_results]
    
            return {
                "reference_title": paper_title,
                "keywords_used": keywords,
                "similarity_threshold": similarity_threshold,
                "total_related_found": len(results),
                "related_papers": results,
            }
    
        except Exception as e:
            return {"error": f"Failed to find related papers: {str(e)}"}
  • Async handler function implementing the 'find_related_papers' tool in the remote version. Identical logic to the sync version, extracts keywords from input paper title, searches arXiv, filters by similarity threshold, and returns related papers.
    @mcp.tool
    async def find_related_papers(
        paper_title: str,
        max_results: int = 10,
        similarity_threshold: float = 0.7,
        category: str | None = None,
    ) -> dict:
        """
        Find papers related to a given paper title using keyword similarity.
    
        :param paper_title: Title of the reference paper
        :param max_results: Maximum number of related papers to return
        :param similarity_threshold: Minimum similarity score (0.0 to 1.0)
        :param category: Optional category filter
        """
        try:
            # Extract keywords from the title
            stop_words = {
                "a",
                "an",
                "and",
                "the",
                "of",
                "in",
                "for",
                "to",
                "with",
                "on",
                "is",
                "are",
                "was",
                "were",
                "it",
            }
    
            keywords = [
                word.lower()
                for word in re.findall(r"\b\w+\b", paper_title)
                if word.lower() not in stop_words and len(word) > 2
            ]
    
            if not keywords:
                return {"error": "No meaningful keywords found in title"}
    
            # Create search query from keywords
            keyword_query = " OR ".join([f'(ti:"{kw}" OR abs:"{kw}")' for kw in keywords])
            query_parts = [f"({keyword_query})"]
    
            if category:
                query_parts.append(f"cat:{category}")
    
            final_query = " AND ".join(query_parts)
    
            # Search for related papers
            search = arxiv.Search(
                query=final_query,
                max_results=max_results * 2,  # Get more results to filter by similarity
                sort_by=arxiv.SortCriterion.Relevance,
                sort_order=arxiv.SortOrder.Descending,
            )
    
            results = []
    
            for r in search.results():
                # Calculate simple similarity based on keyword overlap
                paper_text = f"{r.title} {r.summary}".lower()
    
                # Count keyword matches
                matches = sum(1 for kw in keywords if kw in paper_text)
                similarity = matches / len(keywords) if keywords else 0
    
                if similarity >= similarity_threshold:
                    results.append(
                        {
                            "title": r.title,
                            "authors": [a.name for a in r.authors],
                            "summary": r.summary[:500] + "..."
                            if len(r.summary) > 500
                            else r.summary,
                            "pdf_url": r.pdf_url,
                            "published_date": r.published.strftime("%Y-%m-%d"),
                            "similarity_score": round(similarity, 3),
                            "arxiv_id": r.entry_id.split("/")[-1],
                        }
                    )
    
            # Sort by similarity score and limit results
            results.sort(key=lambda x: x["similarity_score"], reverse=True)
            results = results[:max_results]
    
            return {
                "reference_title": paper_title,
                "keywords_used": keywords,
                "similarity_threshold": similarity_threshold,
                "total_related_found": len(results),
                "related_papers": results,
            }
    
        except Exception as e:
            return {"error": f"Failed to find related papers: {str(e)}"}

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/emi-dm/Arxiv-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server