Paper Download MCP Server

download.py•4.74 kB

"""Download tools for single and batch paper downloads.""" import asyncio import os import time from ..formatters import format_batch_results, format_download_result from ..models import DownloadResult from ..scihub_core.client import SciHubClient from ..scihub_core.models import DownloadResult as CoreDownloadResult from ..server import DEFAULT_OUTPUT_DIR, EMAIL, mcp def _format_core_result(core_result: CoreDownloadResult) -> DownloadResult: """Convert scihub-core download results into MCP-friendly results.""" doi = core_result.normalized_identifier or core_result.identifier file_path = os.path.abspath(core_result.file_path) if core_result.file_path else None file_size = core_result.file_size if file_path and file_size is None and os.path.exists(file_path): file_size = os.path.getsize(file_path) source = core_result.source if not source and isinstance(core_result.metadata, dict): source = core_result.metadata.get("source") return DownloadResult( doi=doi, success=core_result.success, file_path=file_path, file_size=file_size, title=core_result.title, year=core_result.year, source=source, download_time=core_result.download_time, error=core_result.error, ) @mcp.tool() async def paper_download(identifier: str, output_dir: str | None = "./downloads") -> str: """ Download academic paper by DOI, arXiv ID, or URL. Prioritizes open access sources (Unpaywall, arXiv, CORE) before Sci-Hub. Sources: Unpaywall (OA), arXiv (OA), CORE (OA), Sci-Hub (last resort) Args: identifier: DOI, arXiv ID, or URL output_dir: Save directory (default: './downloads') Returns: Markdown with file path, metadata, source, or error message Examples: paper_download("10.1038/nature12373") # DOI paper_download("2301.00001") # arXiv ID paper_download("https://arxiv.org/abs/2301.00001") # URL """ def _download() -> DownloadResult: """Synchronous wrapper for download operation.""" try: # Initialize client with configuration client = SciHubClient(email=EMAIL, output_dir=output_dir or DEFAULT_OUTPUT_DIR) # type: ignore # Download paper core_result = client.download_paper(identifier) return _format_core_result(core_result) except Exception as e: return DownloadResult(doi=identifier, success=False, error=str(e)) # Run synchronous download in thread pool result = await asyncio.to_thread(_download) # Format and return result return format_download_result(result) @mcp.tool() async def paper_batch_download( identifiers: list[str], output_dir: str | None = "./downloads" ) -> str: """ Download multiple papers sequentially (1-50 max, 2s delay). Prioritizes open access sources (Unpaywall, arXiv, CORE) before Sci-Hub. Args: identifiers: List of DOIs, arXiv IDs, or URLs output_dir: Save directory (default: './downloads') Returns: Markdown summary with statistics, successes, and failures Examples: paper_batch_download(["10.1038/nature12373", "2301.00001"]) paper_batch_download(dois, "/papers") """ # Validate input size if not identifiers: return "# Error\n\nNo identifiers provided. Please provide at least one DOI or URL." if len(identifiers) > 50: return ( "# Error\n\n" f"Too many identifiers ({len(identifiers)}). " "Maximum 50 papers per batch.\n\n" "**Suggestion**: Split into multiple smaller batches." ) def _batch_download() -> list[DownloadResult]: """Synchronous wrapper for batch download operation.""" results = [] client = SciHubClient(email=EMAIL, output_dir=output_dir or DEFAULT_OUTPUT_DIR) # type: ignore for i, identifier in enumerate(identifiers): try: core_result = client.download_paper(identifier) results.append(_format_core_result(core_result)) except Exception as e: results.append( DownloadResult( doi=identifier, success=False, error=str(e), ) ) # Add delay between downloads (except after last one) if i < len(identifiers) - 1: time.sleep(2) return results # Run batch download in thread pool results = await asyncio.to_thread(_batch_download) # Format and return results return format_batch_results(results)

Loading blob content...

Latest Blog Posts

Don't Use Large Strings as Cache Keys
By punkpeye on January 11, 2026.
markdown
node-js
cache
What are Claude Skills?
By punkpeye on January 10, 2026.
mcp
skills
How to Test MCP Streamable HTTP Endpoints Using cURL
By punkpeye on January 2, 2026.
tutorial
bash

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Oxidane-bot/paper-download-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

download.py•4.74 kB