find_related_papers
Discover research papers related to a specific title using keyword similarity, filtered by category and similarity threshold, with customizable result limits.
Instructions
Find papers related to a given paper title using keyword similarity.
:param paper_title: Title of the reference paper :param max_results: Maximum number of related papers to return :param similarity_threshold: Minimum similarity score (0.0 to 1.0) :param category: Optional category filter
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| category | No | ||
| max_results | No | ||
| paper_title | Yes | ||
| similarity_threshold | No |
Implementation Reference
- arxiv_searcher/arxiv_mcp.py:494-594 (handler)Synchronous handler function for the 'find_related_papers' MCP tool. Extracts keywords from a given paper title, searches arXiv for matching papers based on keyword overlap similarity, filters by threshold, and returns related papers with similarity scores. Registered via @mcp.tool decorator.@mcp.tool def find_related_papers( paper_title: str, max_results: int = 10, similarity_threshold: float = 0.7, category: str | None = None, ) -> dict: """ Find papers related to a given paper title using keyword similarity. :param paper_title: Title of the reference paper :param max_results: Maximum number of related papers to return :param similarity_threshold: Minimum similarity score (0.0 to 1.0) :param category: Optional category filter """ try: # Extract keywords from the title stop_words = { "a", "an", "and", "the", "of", "in", "for", "to", "with", "on", "is", "are", "was", "were", "it", } keywords = [ word.lower() for word in re.findall(r"\b\w+\b", paper_title) if word.lower() not in stop_words and len(word) > 2 ] if not keywords: return {"error": "No meaningful keywords found in title"} # Create search query from keywords keyword_query = " OR ".join([f'(ti:"{kw}" OR abs:"{kw}")' for kw in keywords]) query_parts = [f"({keyword_query})"] if category: query_parts.append(f"cat:{category}") final_query = " AND ".join(query_parts) # Search for related papers search = arxiv.Search( query=final_query, max_results=max_results * 2, # Get more results to filter by similarity sort_by=arxiv.SortCriterion.Relevance, sort_order=arxiv.SortOrder.Descending, ) results = [] for r in search.results(): # Calculate simple similarity based on keyword overlap paper_text = f"{r.title} {r.summary}".lower() # Count keyword matches matches = sum(1 for kw in keywords if kw in paper_text) similarity = matches / len(keywords) if keywords else 0 if similarity >= similarity_threshold: results.append( { "title": r.title, "authors": [a.name for a in r.authors], "summary": r.summary[:500] + "..." if len(r.summary) > 500 else r.summary, "pdf_url": r.pdf_url, "published_date": r.published.strftime("%Y-%m-%d"), "similarity_score": round(similarity, 3), "arxiv_id": r.entry_id.split("/")[-1], } ) # Sort by similarity score and limit results results.sort(key=lambda x: x["similarity_score"], reverse=True) results = results[:max_results] return { "reference_title": paper_title, "keywords_used": keywords, "similarity_threshold": similarity_threshold, "total_related_found": len(results), "related_papers": results, } except Exception as e: return {"error": f"Failed to find related papers: {str(e)}"}
- Asynchronous handler function for the 'find_related_papers' MCP tool in the remote version. Identical logic to the synchronous version but async for streamable HTTP transport. Registered via @mcp.tool decorator.@mcp.tool async def find_related_papers( paper_title: str, max_results: int = 10, similarity_threshold: float = 0.7, category: str | None = None, ) -> dict: """ Find papers related to a given paper title using keyword similarity. :param paper_title: Title of the reference paper :param max_results: Maximum number of related papers to return :param similarity_threshold: Minimum similarity score (0.0 to 1.0) :param category: Optional category filter """ try: # Extract keywords from the title stop_words = { "a", "an", "and", "the", "of", "in", "for", "to", "with", "on", "is", "are", "was", "were", "it", } keywords = [ word.lower() for word in re.findall(r"\b\w+\b", paper_title) if word.lower() not in stop_words and len(word) > 2 ] if not keywords: return {"error": "No meaningful keywords found in title"} # Create search query from keywords keyword_query = " OR ".join([f'(ti:"{kw}" OR abs:"{kw}")' for kw in keywords]) query_parts = [f"({keyword_query})"] if category: query_parts.append(f"cat:{category}") final_query = " AND ".join(query_parts) # Search for related papers search = arxiv.Search( query=final_query, max_results=max_results * 2, # Get more results to filter by similarity sort_by=arxiv.SortCriterion.Relevance, sort_order=arxiv.SortOrder.Descending, ) results = [] for r in search.results(): # Calculate simple similarity based on keyword overlap paper_text = f"{r.title} {r.summary}".lower() # Count keyword matches matches = sum(1 for kw in keywords if kw in paper_text) similarity = matches / len(keywords) if keywords else 0 if similarity >= similarity_threshold: results.append( { "title": r.title, "authors": [a.name for a in r.authors], "summary": r.summary[:500] + "..." if len(r.summary) > 500 else r.summary, "pdf_url": r.pdf_url, "published_date": r.published.strftime("%Y-%m-%d"), "similarity_score": round(similarity, 3), "arxiv_id": r.entry_id.split("/")[-1], } ) # Sort by similarity score and limit results results.sort(key=lambda x: x["similarity_score"], reverse=True) results = results[:max_results] return { "reference_title": paper_title, "keywords_used": keywords, "similarity_threshold": similarity_threshold, "total_related_found": len(results), "related_papers": results, } except Exception as e: return {"error": f"Failed to find related papers: {str(e)}"}