Academic MCP Server

academic_server.py•11.3 kB

""" Academic MCP Server Unified interface for multiple academic databases (PubMed, bioRxiv, arXiv, Semantic Scholar) """ from typing import Any, List, Dict, Optional, Union import asyncio import logging from mcp.server.fastmcp import FastMCP # Import adapters from adapters.pubmed_adapter import PubMedAdapter from adapters.biorxiv_adapter import BioRxivAdapter from adapters.arxiv_adapter import ArXivAdapter from adapters.semantic_scholar_adapter import SemanticScholarAdapter from adapters.scihub_adapter import SciHubAdapter # Import utilities from utils.helpers import merge_results_from_sources # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Initialize FastMCP server mcp = FastMCP("academic") # Initialize all adapters adapters = { "pubmed": PubMedAdapter(), "biorxiv": BioRxivAdapter(server="biorxiv"), "medrxiv": BioRxivAdapter(server="medrxiv"), "arxiv": ArXivAdapter(), "semantic_scholar": SemanticScholarAdapter(), # Add API key if you have one "scihub": SciHubAdapter() # Sci-Hub for comprehensive paper access } AVAILABLE_SOURCES = list(adapters.keys()) @mcp.tool() async def search_papers( keywords: str, source: str = "all", num_results: int = 10 ) -> List[Dict[str, Any]]: """ Search for academic papers across multiple databases. Args: keywords: Search query string (e.g., "UCAR-T", "machine learning") source: Data source to search. Options: "all", "pubmed", "biorxiv", "medrxiv", "arxiv", "semantic_scholar", "scihub" num_results: Number of results to return per source (default: 10) Returns: List of dictionaries containing paper information with standardized fields: - id: Unique identifier (PMID, DOI, arXiv ID, etc.) - title: Paper title - authors: Author names - abstract: Paper abstract - publication_date: Publication date - journal: Journal or venue name - url: Link to paper - pdf_url: Link to PDF if available - source: Database source """ logging.info(f"Searching for papers with keywords: '{keywords}', source: {source}, num_results: {num_results}") try: if source == "all": # Search all sources all_results = {} for source_name, adapter in adapters.items(): try: results = await asyncio.to_thread( adapter.search_by_keywords, keywords, num_results ) if results: all_results[source_name] = results logging.info(f"Found {len(results)} results from {source_name}") except Exception as e: logging.error(f"Error searching {source_name}: {e}") # Merge results from all sources merged = merge_results_from_sources(all_results) logging.info(f"Total unique results: {len(merged)}") return merged elif source in adapters: # Search specific source adapter = adapters[source] results = await asyncio.to_thread( adapter.search_by_keywords, keywords, num_results ) logging.info(f"Found {len(results)} results from {source}") return results else: error_msg = f"Invalid source '{source}'. Available sources: {', '.join(AVAILABLE_SOURCES)}, 'all'" logging.error(error_msg) return [{"error": error_msg}] except Exception as e: error_msg = f"An error occurred while searching: {str(e)}" logging.error(error_msg) return [{"error": error_msg}] @mcp.tool() async def search_papers_advanced( title: Optional[str] = None, author: Optional[str] = None, journal: Optional[str] = None, start_date: Optional[str] = None, end_date: Optional[str] = None, term: Optional[str] = None, source: str = "all", num_results: int = 10 ) -> List[Dict[str, Any]]: """ Perform advanced search for academic papers with multiple parameters. Args: title: Search in paper titles author: Author name journal: Journal name (primarily for PubMed) start_date: Start date (format varies by source: PubMed uses YYYY/MM/DD, others use YYYY-MM-DD) end_date: End date term: General search term source: Data source. Options: "all", "pubmed", "biorxiv", "medrxiv", "arxiv", "semantic_scholar", "scihub" num_results: Number of results to return per source Returns: List of dictionaries containing paper information """ logging.info(f"Advanced search - source: {source}, title: {title}, author: {author}, journal: {journal}") try: search_params = { "title": title, "author": author, "journal": journal, "start_date": start_date, "end_date": end_date, "term": term, "num_results": num_results } # Remove None values search_params = {k: v for k, v in search_params.items() if v is not None} if source == "all": all_results = {} for source_name, adapter in adapters.items(): try: results = await asyncio.to_thread( adapter.search_advanced, **search_params ) if results: all_results[source_name] = results logging.info(f"Found {len(results)} results from {source_name}") except Exception as e: logging.error(f"Error in advanced search for {source_name}: {e}") merged = merge_results_from_sources(all_results) return merged elif source in adapters: adapter = adapters[source] results = await asyncio.to_thread( adapter.search_advanced, **search_params ) return results else: return [{"error": f"Invalid source '{source}'. Available: {', '.join(AVAILABLE_SOURCES)}, 'all'"}] except Exception as e: return [{"error": f"Error in advanced search: {str(e)}"}] @mcp.tool() async def get_paper_metadata( identifier: str, source: str ) -> Dict[str, Any]: """ Get detailed metadata for a specific paper. Args: identifier: Paper identifier (PMID for PubMed, DOI for bioRxiv, arXiv ID for arXiv, etc.) source: Data source. Options: "pubmed", "biorxiv", "medrxiv", "arxiv", "semantic_scholar" Returns: Dictionary containing detailed paper metadata Examples: - PubMed: get_paper_metadata("40883768", "pubmed") - bioRxiv: get_paper_metadata("10.1101/2024.01.001", "biorxiv") - arXiv: get_paper_metadata("2301.00001", "arxiv") """ logging.info(f"Fetching metadata for {source}:{identifier}") try: if source not in adapters: return {"error": f"Invalid source '{source}'. Available: {', '.join(AVAILABLE_SOURCES)}"} adapter = adapters[source] metadata = await asyncio.to_thread(adapter.get_metadata, identifier) if metadata: logging.info(f"Successfully retrieved metadata for {identifier}") return metadata except Exception as e: return {"error": f"Error fetching metadata: {str(e)}"} @mcp.tool() async def download_paper_pdf( identifier: str, source: str ) -> str: """ Download PDF for a specific paper. Args: identifier: Paper identifier (PMID, DOI, arXiv ID, etc.) source: Data source. Options: "pubmed", "biorxiv", "medrxiv", "arxiv", "semantic_scholar" Returns: Status message indicating success or failure Note: - PubMed: Only open access articles from PMC can be downloaded - bioRxiv/medRxiv: All articles are open access - arXiv: All articles are open access - Semantic Scholar: Availability depends on the paper """ logging.info(f"Attempting to download PDF for {source}:{identifier}") try: if source not in adapters: return f"Error: Invalid source '{source}'. Available: {', '.join(AVAILABLE_SOURCES)}" adapter = adapters[source] result = await asyncio.to_thread(adapter.download_pdf, identifier) return result except Exception as e: return f"Error downloading PDF: {str(e)}" @mcp.tool() async def list_available_sources() -> List[str]: """ List all available data sources. Returns: List of available source names """ return AVAILABLE_SOURCES @mcp.prompt() async def deep_paper_analysis( identifier: str, source: str ) -> Dict[str, str]: """ Perform comprehensive analysis of a paper (currently only supports PubMed). Args: identifier: Paper identifier source: Data source (currently only "pubmed" is fully supported) Returns: Dictionary containing analysis prompt or error message """ logging.info(f"Performing deep analysis for {source}:{identifier}") try: if source == "pubmed": adapter = adapters["pubmed"] analysis = await asyncio.to_thread(adapter.deep_analysis, identifier) return {"analysis_prompt": analysis} else: # For other sources, generate basic analysis from metadata metadata = await get_paper_metadata(identifier, source) if "error" in metadata: return {"error": metadata["error"]} analysis_prompt = f""" As an expert in scientific paper analysis, please provide a comprehensive analysis of the following paper: Title: {metadata.get('title', 'N/A')} Authors: {metadata.get('authors', 'N/A')} Journal/Venue: {metadata.get('journal', 'N/A')} Publication Date: {metadata.get('publication_date', 'N/A')} Abstract: {metadata.get('abstract', 'N/A')} Please address the following aspects in your analysis: 1. Research Background and Significance 2. Main Research Questions or Hypotheses 3. Methodology Overview 4. Key Findings and Results 5. Conclusions and Implications 6. Limitations of the Study 7. Future Research Directions 8. Relationship to Other Studies in the Field 9. Overall Evaluation of the Research Ensure your analysis is thorough, objective, and based on the information provided. """ return {"analysis_prompt": analysis_prompt} except Exception as e: return {"error": f"Error performing analysis: {str(e)}"} if __name__ == "__main__": logging.info("Starting Academic MCP Server") logging.info(f"Available sources: {', '.join(AVAILABLE_SOURCES)}") # Initialize and run the server mcp.run(transport='stdio')

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/nanyang12138/Academic-MCP-Server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server