Academic MCP Server

biorxiv_adapter.py•8.32 KiB

""" bioRxiv/medRxiv Adapter Provides access to bioRxiv and medRxiv preprint servers """ import requests from typing import List, Dict, Any, Optional from datetime import datetime, timedelta from .base_adapter import BaseAdapter class BioRxivAdapter(BaseAdapter): """Adapter for bioRxiv and medRxiv preprint servers""" def __init__(self, server: str = "biorxiv"): """ Initialize adapter Args: server: "biorxiv" or "medrxiv" """ if server not in ["biorxiv", "medrxiv"]: raise ValueError("server must be 'biorxiv' or 'medrxiv'") self.server = server self.base_url = f"https://api.biorxiv.org" def get_source_name(self) -> str: return self.server def search_by_keywords(self, keywords: str, num_results: int = 10) -> List[Dict[str, Any]]: """ Search bioRxiv/medRxiv by keywords Note: bioRxiv API doesn't support keyword search directly, so we fetch recent papers and filter by keywords Args: keywords: Search query string num_results: Number of results to return Returns: List of standardized paper dictionaries """ try: # Fetch papers from the last year and filter end_date = datetime.now() start_date = end_date - timedelta(days=365) date_range = f"{start_date.strftime('%Y-%m-%d')}/{end_date.strftime('%Y-%m-%d')}" all_results = [] cursor = 0 keywords_lower = keywords.lower() # Fetch in batches until we have enough results while len(all_results) < num_results and cursor < 10000: url = f"{self.base_url}/details/{self.server}/{date_range}/{cursor}" response = requests.get(url, timeout=30) if response.status_code != 200: break data = response.json() collection = data.get("collection", []) if not collection: break # Filter by keywords in title or abstract for item in collection: title = item.get("title", "").lower() abstract = item.get("abstract", "").lower() if keywords_lower in title or keywords_lower in abstract: all_results.append(self._format_biorxiv_result(item)) if len(all_results) >= num_results: break cursor += len(collection) return all_results[:num_results] except Exception as e: print(f"Error searching {self.server}: {e}") return [] def search_advanced(self, **kwargs) -> List[Dict[str, Any]]: """ Advanced search in bioRxiv/medRxiv Args: title: Search in title author: Author name start_date: Start date (YYYY-MM-DD) end_date: End date (YYYY-MM-DD) num_results: Number of results Returns: List of standardized paper dictionaries """ try: start_date = kwargs.get('start_date') end_date = kwargs.get('end_date') title_query = kwargs.get('title', '').lower() author_query = kwargs.get('author', '').lower() num_results = kwargs.get('num_results', 10) # Default to last year if no dates provided if not end_date: end_date = datetime.now().strftime('%Y-%m-%d') if not start_date: start_obj = datetime.strptime(end_date, '%Y-%m-%d') - timedelta(days=365) start_date = start_obj.strftime('%Y-%m-%d') date_range = f"{start_date}/{end_date}" all_results = [] cursor = 0 while len(all_results) < num_results and cursor < 10000: url = f"{self.base_url}/details/{self.server}/{date_range}/{cursor}" response = requests.get(url, timeout=30) if response.status_code != 200: break data = response.json() collection = data.get("collection", []) if not collection: break for item in collection: # Filter by title and author title_match = not title_query or title_query in item.get("title", "").lower() author_match = not author_query or author_query in item.get("authors", "").lower() if title_match and author_match: all_results.append(self._format_biorxiv_result(item)) if len(all_results) >= num_results: break cursor += len(collection) return all_results[:num_results] except Exception as e: print(f"Error in advanced {self.server} search: {e}") return [] def get_metadata(self, identifier: str) -> Dict[str, Any]: """ Get metadata for a bioRxiv/medRxiv article by DOI Args: identifier: DOI of the article (e.g., "10.1101/2024.01.001") Returns: Standardized metadata dictionary """ try: # Extract DOI suffix if full DOI provided doi = identifier.replace("10.1101/", "") url = f"{self.base_url}/details/{self.server}/{doi}" response = requests.get(url, timeout=30) if response.status_code != 200: return {"error": f"Could not retrieve metadata for DOI: {identifier}"} data = response.json() collection = data.get("collection", []) if collection: return self._format_biorxiv_result(collection[0]) return {"error": f"No metadata found for DOI: {identifier}"} except Exception as e: return {"error": f"Error fetching metadata: {str(e)}"} def download_pdf(self, identifier: str) -> str: """ Download PDF for a bioRxiv/medRxiv article Args: identifier: DOI of the article Returns: Status message """ try: # bioRxiv/medRxiv DOI format: 10.1101/YYYY.MM.DD.XXXXXX doi = identifier.replace("10.1101/", "") # PDF URL format pdf_url = f"https://www.{self.server}.org/content/{doi}v1.full.pdf" response = requests.get(pdf_url, timeout=30) if response.status_code != 200: return f"Error: Unable to download PDF (status code: {response.status_code})" # Save PDF filename = f"{self.server}_{doi.replace('/', '_')}.pdf" with open(filename, 'wb') as f: f.write(response.content) return f"PDF downloaded successfully as {filename}" except Exception as e: return f"Error downloading PDF: {str(e)}" def _format_biorxiv_result(self, item: Dict[str, Any]) -> Dict[str, Any]: """ Convert bioRxiv/medRxiv format to standardized format Args: item: Result from bioRxiv API Returns: Standardized result dictionary """ doi = item.get("doi", "") return { "id": doi, "title": item.get("title", ""), "authors": item.get("authors", ""), "abstract": item.get("abstract", ""), "publication_date": item.get("date", ""), "journal": f"{self.server} (preprint)", "url": f"https://www.{self.server}.org/content/{doi}", "pdf_url": f"https://www.{self.server}.org/content/{doi}v1.full.pdf", "source": self.server }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/nanyang12138/Academic-MCP-Server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

biorxiv_adapter.py•8.32 KiB