search_medrxiv
Search academic papers from medRxiv by medical category to find recent research in specific fields like infectious diseases or oncology.
Instructions
Search academic papers from medRxiv.
Note: medRxiv API filters by category name within the last 30 days, not full-text keyword search. Use a category keyword such as 'infectious_diseases', 'cardiovascular_medicine', 'oncology', etc.
Args: query: Category name to filter by (e.g., 'infectious_diseases', 'oncology'). max_results: Maximum number of papers to return (default: 10). Returns: List of paper metadata in dictionary format.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| query | Yes | ||
| max_results | No |
Implementation Reference
- Implementation of the MedRxivSearcher class which performs the actual API calls to search for papers on medRxiv.
class MedRxivSearcher(PaperSource): """Searcher for medRxiv papers""" BASE_URL = "https://api.biorxiv.org/details/medrxiv" def __init__(self): self.session = requests.Session() self.session.proxies = {'http': None, 'https': None} self.timeout = 30 self.max_retries = 3 def search(self, query: str, max_results: int = 10, days: int = 30) -> List[Paper]: """ Search for papers on medRxiv by category within the last N days. Args: query: Category name to search for (e.g., "cardiovascular medicine"). max_results: Maximum number of papers to return. days: Number of days to look back for papers. Returns: List of Paper objects matching the category within the specified date range. """ # Calculate date range: last N days end_date = datetime.now().strftime('%Y-%m-%d') start_date = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d') # Format category: lowercase and replace spaces with underscores category = query.lower().replace(' ', '_') papers = [] cursor = 0 while len(papers) < max_results: url = f"{self.BASE_URL}/{start_date}/{end_date}/{cursor}" if category: url += f"?category={category}" tries = 0 while tries < self.max_retries: try: response = self.session.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() collection = data.get('collection', []) for item in collection: try: date = datetime.strptime(item['date'], '%Y-%m-%d') papers.append(Paper( paper_id=item['doi'], title=item['title'], authors=item['authors'].split('; '), abstract=item['abstract'], url=f"https://www.medrxiv.org/content/{item['doi']}v{item.get('version', '1')}", pdf_url=f"https://www.medrxiv.org/content/{item['doi']}v{item.get('version', '1')}.full.pdf", published_date=date, updated_date=date, source="medrxiv", categories=[item['category']], keywords=[], doi=item['doi'] )) except Exception as e: print(f"Error parsing medRxiv entry: {e}") if len(collection) < 100: break # No more results cursor += 100 break # Exit retry loop on success except requests.exceptions.RequestException as e: tries += 1 if tries == self.max_retries: print(f"Failed to connect to medRxiv API after {self.max_retries} attempts: {e}") break print(f"Attempt {tries} failed, retrying...") else: continue break return papers[:max_results] - paper_search_mcp/server.py:404-418 (handler)MCP tool handler `search_medrxiv` that wraps the MedRxivSearcher service.
async def search_medrxiv(query: str, max_results: int = 10) -> List[Dict]: """Search academic papers from medRxiv. Note: medRxiv API filters by category name within the last 30 days, not full-text keyword search. Use a category keyword such as 'infectious_diseases', 'cardiovascular_medicine', 'oncology', etc. Args: query: Category name to filter by (e.g., 'infectious_diseases', 'oncology'). max_results: Maximum number of papers to return (default: 10). Returns: List of paper metadata in dictionary format. """ papers = await async_search(medrxiv_searcher, query, max_results) return papers if papers else []