Skip to main content
Glama

Paper Search MCP

by openags
server.py15.3 kB
# paper_search_mcp/server.py from typing import List, Dict, Optional import httpx from mcp.server.fastmcp import FastMCP from .academic_platforms.arxiv import ArxivSearcher from .academic_platforms.pubmed import PubMedSearcher from .academic_platforms.biorxiv import BioRxivSearcher from .academic_platforms.medrxiv import MedRxivSearcher from .academic_platforms.google_scholar import GoogleScholarSearcher from .academic_platforms.iacr import IACRSearcher from .academic_platforms.semantic import SemanticSearcher from .academic_platforms.crossref import CrossRefSearcher # from .academic_platforms.hub import SciHubSearcher from .paper import Paper # Initialize MCP server mcp = FastMCP("paper_search_server") # Instances of searchers arxiv_searcher = ArxivSearcher() pubmed_searcher = PubMedSearcher() biorxiv_searcher = BioRxivSearcher() medrxiv_searcher = MedRxivSearcher() google_scholar_searcher = GoogleScholarSearcher() iacr_searcher = IACRSearcher() semantic_searcher = SemanticSearcher() crossref_searcher = CrossRefSearcher() # scihub_searcher = SciHubSearcher() # Asynchronous helper to adapt synchronous searchers async def async_search(searcher, query: str, max_results: int, **kwargs) -> List[Dict]: async with httpx.AsyncClient() as client: # Assuming searchers use requests internally; we'll call synchronously for now if 'year' in kwargs: papers = searcher.search(query, year=kwargs['year'], max_results=max_results) else: papers = searcher.search(query, max_results=max_results) return [paper.to_dict() for paper in papers] # Tool definitions @mcp.tool() async def search_arxiv(query: str, max_results: int = 10) -> List[Dict]: """Search academic papers from arXiv. Args: query: Search query string (e.g., 'machine learning'). max_results: Maximum number of papers to return (default: 10). Returns: List of paper metadata in dictionary format. """ papers = await async_search(arxiv_searcher, query, max_results) return papers if papers else [] @mcp.tool() async def search_pubmed(query: str, max_results: int = 10) -> List[Dict]: """Search academic papers from PubMed. Args: query: Search query string (e.g., 'machine learning'). max_results: Maximum number of papers to return (default: 10). Returns: List of paper metadata in dictionary format. """ papers = await async_search(pubmed_searcher, query, max_results) return papers if papers else [] @mcp.tool() async def search_biorxiv(query: str, max_results: int = 10) -> List[Dict]: """Search academic papers from bioRxiv. Args: query: Search query string (e.g., 'machine learning'). max_results: Maximum number of papers to return (default: 10). Returns: List of paper metadata in dictionary format. """ papers = await async_search(biorxiv_searcher, query, max_results) return papers if papers else [] @mcp.tool() async def search_medrxiv(query: str, max_results: int = 10) -> List[Dict]: """Search academic papers from medRxiv. Args: query: Search query string (e.g., 'machine learning'). max_results: Maximum number of papers to return (default: 10). Returns: List of paper metadata in dictionary format. """ papers = await async_search(medrxiv_searcher, query, max_results) return papers if papers else [] @mcp.tool() async def search_google_scholar(query: str, max_results: int = 10) -> List[Dict]: """Search academic papers from Google Scholar. Args: query: Search query string (e.g., 'machine learning'). max_results: Maximum number of papers to return (default: 10). Returns: List of paper metadata in dictionary format. """ papers = await async_search(google_scholar_searcher, query, max_results) return papers if papers else [] @mcp.tool() async def search_iacr( query: str, max_results: int = 10, fetch_details: bool = True ) -> List[Dict]: """Search academic papers from IACR ePrint Archive. Args: query: Search query string (e.g., 'cryptography', 'secret sharing'). max_results: Maximum number of papers to return (default: 10). fetch_details: Whether to fetch detailed information for each paper (default: True). Returns: List of paper metadata in dictionary format. """ async with httpx.AsyncClient() as client: papers = iacr_searcher.search(query, max_results, fetch_details) return [paper.to_dict() for paper in papers] if papers else [] @mcp.tool() async def download_arxiv(paper_id: str, save_path: str = "./downloads") -> str: """Download PDF of an arXiv paper. Args: paper_id: arXiv paper ID (e.g., '2106.12345'). save_path: Directory to save the PDF (default: './downloads'). Returns: Path to the downloaded PDF file. """ async with httpx.AsyncClient() as client: return arxiv_searcher.download_pdf(paper_id, save_path) @mcp.tool() async def download_pubmed(paper_id: str, save_path: str = "./downloads") -> str: """Attempt to download PDF of a PubMed paper. Args: paper_id: PubMed ID (PMID). save_path: Directory to save the PDF (default: './downloads'). Returns: str: Message indicating that direct PDF download is not supported. """ try: return pubmed_searcher.download_pdf(paper_id, save_path) except NotImplementedError as e: return str(e) @mcp.tool() async def download_biorxiv(paper_id: str, save_path: str = "./downloads") -> str: """Download PDF of a bioRxiv paper. Args: paper_id: bioRxiv DOI. save_path: Directory to save the PDF (default: './downloads'). Returns: Path to the downloaded PDF file. """ return biorxiv_searcher.download_pdf(paper_id, save_path) @mcp.tool() async def download_medrxiv(paper_id: str, save_path: str = "./downloads") -> str: """Download PDF of a medRxiv paper. Args: paper_id: medRxiv DOI. save_path: Directory to save the PDF (default: './downloads'). Returns: Path to the downloaded PDF file. """ return medrxiv_searcher.download_pdf(paper_id, save_path) @mcp.tool() async def download_iacr(paper_id: str, save_path: str = "./downloads") -> str: """Download PDF of an IACR ePrint paper. Args: paper_id: IACR paper ID (e.g., '2009/101'). save_path: Directory to save the PDF (default: './downloads'). Returns: Path to the downloaded PDF file. """ return iacr_searcher.download_pdf(paper_id, save_path) @mcp.tool() async def read_arxiv_paper(paper_id: str, save_path: str = "./downloads") -> str: """Read and extract text content from an arXiv paper PDF. Args: paper_id: arXiv paper ID (e.g., '2106.12345'). save_path: Directory where the PDF is/will be saved (default: './downloads'). Returns: str: The extracted text content of the paper. """ try: return arxiv_searcher.read_paper(paper_id, save_path) except Exception as e: print(f"Error reading paper {paper_id}: {e}") return "" @mcp.tool() async def read_pubmed_paper(paper_id: str, save_path: str = "./downloads") -> str: """Read and extract text content from a PubMed paper. Args: paper_id: PubMed ID (PMID). save_path: Directory where the PDF would be saved (unused). Returns: str: Message indicating that direct paper reading is not supported. """ return pubmed_searcher.read_paper(paper_id, save_path) @mcp.tool() async def read_biorxiv_paper(paper_id: str, save_path: str = "./downloads") -> str: """Read and extract text content from a bioRxiv paper PDF. Args: paper_id: bioRxiv DOI. save_path: Directory where the PDF is/will be saved (default: './downloads'). Returns: str: The extracted text content of the paper. """ try: return biorxiv_searcher.read_paper(paper_id, save_path) except Exception as e: print(f"Error reading paper {paper_id}: {e}") return "" @mcp.tool() async def read_medrxiv_paper(paper_id: str, save_path: str = "./downloads") -> str: """Read and extract text content from a medRxiv paper PDF. Args: paper_id: medRxiv DOI. save_path: Directory where the PDF is/will be saved (default: './downloads'). Returns: str: The extracted text content of the paper. """ try: return medrxiv_searcher.read_paper(paper_id, save_path) except Exception as e: print(f"Error reading paper {paper_id}: {e}") return "" @mcp.tool() async def read_iacr_paper(paper_id: str, save_path: str = "./downloads") -> str: """Read and extract text content from an IACR ePrint paper PDF. Args: paper_id: IACR paper ID (e.g., '2009/101'). save_path: Directory where the PDF is/will be saved (default: './downloads'). Returns: str: The extracted text content of the paper. """ try: return iacr_searcher.read_paper(paper_id, save_path) except Exception as e: print(f"Error reading paper {paper_id}: {e}") return "" @mcp.tool() async def search_semantic(query: str, year: Optional[str] = None, max_results: int = 10) -> List[Dict]: """Search academic papers from Semantic Scholar. Args: query: Search query string (e.g., 'machine learning'). year: Optional year filter (e.g., '2019', '2016-2020', '2010-', '-2015'). max_results: Maximum number of papers to return (default: 10). Returns: List of paper metadata in dictionary format. """ kwargs = {} if year is not None: kwargs['year'] = year papers = await async_search(semantic_searcher, query, max_results, **kwargs) return papers if papers else [] @mcp.tool() async def download_semantic(paper_id: str, save_path: str = "./downloads") -> str: """Download PDF of a Semantic Scholar paper. Args: paper_id: Semantic Scholar paper ID, Paper identifier in one of the following formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") save_path: Directory to save the PDF (default: './downloads'). Returns: Path to the downloaded PDF file. """ return semantic_searcher.download_pdf(paper_id, save_path) @mcp.tool() async def read_semantic_paper(paper_id: str, save_path: str = "./downloads") -> str: """Read and extract text content from a Semantic Scholar paper. Args: paper_id: Semantic Scholar paper ID, Paper identifier in one of the following formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") save_path: Directory where the PDF is/will be saved (default: './downloads'). Returns: str: The extracted text content of the paper. """ try: return semantic_searcher.read_paper(paper_id, save_path) except Exception as e: print(f"Error reading paper {paper_id}: {e}") return "" @mcp.tool() async def search_crossref(query: str, max_results: int = 10, **kwargs) -> List[Dict]: """Search academic papers from CrossRef database. CrossRef is a scholarly infrastructure organization that provides persistent identifiers (DOIs) for scholarly content and metadata. It's one of the largest citation databases covering millions of academic papers, journals, books, and other scholarly content. Args: query: Search query string (e.g., 'machine learning', 'climate change'). max_results: Maximum number of papers to return (default: 10, max: 1000). **kwargs: Additional search parameters: - filter: CrossRef filter string (e.g., 'has-full-text:true,from-pub-date:2020') - sort: Sort field ('relevance', 'published', 'updated', 'deposited', etc.) - order: Sort order ('asc' or 'desc') Returns: List of paper metadata in dictionary format. Examples: # Basic search search_crossref("deep learning", 20) # Search with filters search_crossref("climate change", 10, filter="from-pub-date:2020,has-full-text:true") # Search sorted by publication date search_crossref("neural networks", 15, sort="published", order="desc") """ papers = await async_search(crossref_searcher, query, max_results, **kwargs) return papers if papers else [] @mcp.tool() async def get_crossref_paper_by_doi(doi: str) -> Dict: """Get a specific paper from CrossRef by its DOI. Args: doi: Digital Object Identifier (e.g., '10.1038/nature12373'). Returns: Paper metadata in dictionary format, or empty dict if not found. Example: get_crossref_paper_by_doi("10.1038/nature12373") """ async with httpx.AsyncClient() as client: paper = crossref_searcher.get_paper_by_doi(doi) return paper.to_dict() if paper else {} @mcp.tool() async def download_crossref(paper_id: str, save_path: str = "./downloads") -> str: """Attempt to download PDF of a CrossRef paper. Args: paper_id: CrossRef DOI (e.g., '10.1038/nature12373'). save_path: Directory to save the PDF (default: './downloads'). Returns: str: Message indicating that direct PDF download is not supported. Note: CrossRef is a citation database and doesn't provide direct PDF downloads. Use the DOI to access the paper through the publisher's website. """ try: return crossref_searcher.download_pdf(paper_id, save_path) except NotImplementedError as e: return str(e) @mcp.tool() async def read_crossref_paper(paper_id: str, save_path: str = "./downloads") -> str: """Attempt to read and extract text content from a CrossRef paper. Args: paper_id: CrossRef DOI (e.g., '10.1038/nature12373'). save_path: Directory where the PDF is/will be saved (default: './downloads'). Returns: str: Message indicating that direct paper reading is not supported. Note: CrossRef is a citation database and doesn't provide direct paper content. Use the DOI to access the paper through the publisher's website. """ return crossref_searcher.read_paper(paper_id, save_path) if __name__ == "__main__": mcp.run(transport="stdio")

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/openags/paper-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server