Skip to main content
Glama

download_paper

Download academic papers from arXiv by ID to create accessible resources for research and analysis.

Instructions

Download a paper and create a resource for it

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
paper_idYesThe arXiv ID of the paper to download
check_statusNoIf true, only check conversion status without downloading

Implementation Reference

  • The `handle_download` function implements the core logic for the `download_paper` tool. It handles downloading the arXiv paper PDF, converting it to Markdown using pymupdf4llm, manages asynchronous conversion status, and returns JSON status updates.
    async def handle_download(arguments: Dict[str, Any]) -> List[types.TextContent]: """Handle paper download and conversion requests.""" try: paper_id = arguments["paper_id"] check_status = arguments.get("check_status", False) # If only checking status if check_status: status = conversion_statuses.get(paper_id) if not status: if get_paper_path(paper_id, ".md").exists(): return [ types.TextContent( type="text", text=json.dumps( { "status": "success", "message": "Paper is ready", "resource_uri": f"file://{get_paper_path(paper_id, '.md')}", } ), ) ] return [ types.TextContent( type="text", text=json.dumps( { "status": "unknown", "message": "No download or conversion in progress", } ), ) ] return [ types.TextContent( type="text", text=json.dumps( { "status": status.status, "started_at": status.started_at.isoformat(), "completed_at": ( status.completed_at.isoformat() if status.completed_at else None ), "error": status.error, "message": f"Paper conversion {status.status}", } ), ) ] # Check if paper is already converted if get_paper_path(paper_id, ".md").exists(): return [ types.TextContent( type="text", text=json.dumps( { "status": "success", "message": "Paper already available", "resource_uri": f"file://{get_paper_path(paper_id, '.md')}", } ), ) ] # Check if already in progress if paper_id in conversion_statuses: status = conversion_statuses[paper_id] return [ types.TextContent( type="text", text=json.dumps( { "status": status.status, "message": f"Paper conversion {status.status}", "started_at": status.started_at.isoformat(), } ), ) ] # Start new download and conversion pdf_path = get_paper_path(paper_id, ".pdf") client = arxiv.Client() # Initialize status conversion_statuses[paper_id] = ConversionStatus( paper_id=paper_id, status="downloading", started_at=datetime.now() ) # Download PDF paper = next(client.results(arxiv.Search(id_list=[paper_id]))) paper.download_pdf(dirpath=pdf_path.parent, filename=pdf_path.name) # Update status and start conversion status = conversion_statuses[paper_id] status.status = "converting" # Start conversion in thread asyncio.create_task( asyncio.to_thread(convert_pdf_to_markdown, paper_id, pdf_path) ) return [ types.TextContent( type="text", text=json.dumps( { "status": "converting", "message": "Paper downloaded, conversion started", "started_at": status.started_at.isoformat(), } ), ) ] except StopIteration: return [ types.TextContent( type="text", text=json.dumps( { "status": "error", "message": f"Paper {paper_id} not found on arXiv", } ), ) ] except Exception as e: return [ types.TextContent( type="text", text=json.dumps({"status": "error", "message": f"Error: {str(e)}"}), ) ]
  • The `download_tool` object defines the tool's metadata, description, and input schema requiring `paper_id` and optional `check_status`.
    download_tool = types.Tool( name="download_paper", description="Download a paper and create a resource for it", inputSchema={ "type": "object", "properties": { "paper_id": { "type": "string", "description": "The arXiv ID of the paper to download", }, "check_status": { "type": "boolean", "description": "If true, only check conversion status without downloading", "default": False, }, }, "required": ["paper_id"], }, )
  • The `list_tools` method registers `download_tool` as one of the available tools returned to the MCP client.
    @server.list_tools() async def list_tools() -> List[types.Tool]: """List available arXiv research tools.""" return [search_tool, download_tool, list_tool, read_tool]
  • The `call_tool` method dispatches calls to `download_paper` by invoking the `handle_download` function.
    @server.call_tool() async def call_tool(name: str, arguments: Dict[str, Any]) -> List[types.TextContent]: """Handle tool calls for arXiv research functionality.""" logger.debug(f"Calling tool {name} with arguments {arguments}") try: if name == "search_papers": return await handle_search(arguments) elif name == "download_paper": return await handle_download(arguments) elif name == "list_papers": return await handle_list_papers(arguments) elif name == "read_paper": return await handle_read_paper(arguments) else: return [types.TextContent(type="text", text=f"Error: Unknown tool {name}")] except Exception as e: logger.error(f"Tool error: {str(e)}") return [types.TextContent(type="text", text=f"Error: {str(e)}")]
  • The tools/__init__.py re-exports `download_tool` and `handle_download` for easy import in server.py.
    from .search import search_tool, handle_search from .download import download_tool, handle_download from .list_papers import list_tool, handle_list_papers from .read_paper import read_tool, handle_read_paper

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/blazickjp/arxiv-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server