download_paper
Download arXiv research papers by ID to create accessible resources for reading and analysis.
Instructions
Download a paper and create a resource for it
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| paper_id | Yes | The arXiv ID of the paper to download | |
| check_status | No | If true, only check conversion status without downloading |
Implementation Reference
- Main execution logic for the download_paper tool: downloads arXiv paper PDF, converts to Markdown asynchronously, tracks status, and returns JSON status updates.async def handle_download(arguments: Dict[str, Any]) -> List[types.TextContent]: """Handle paper download and conversion requests.""" try: paper_id = arguments["paper_id"] check_status = arguments.get("check_status", False) # If only checking status if check_status: status = conversion_statuses.get(paper_id) if not status: if get_paper_path(paper_id, ".md").exists(): return [types.TextContent( type="text", text=json.dumps({ "status": "success", "message": "Paper is ready", "resource_uri": f"file://{get_paper_path(paper_id, '.md')}" }) )] return [types.TextContent( type="text", text=json.dumps({ "status": "unknown", "message": "No download or conversion in progress" }) )] return [types.TextContent( type="text", text=json.dumps({ "status": status.status, "started_at": status.started_at.isoformat(), "completed_at": status.completed_at.isoformat() if status.completed_at else None, "error": status.error, "message": f"Paper conversion {status.status}" }) )] # Check if paper is already converted if get_paper_path(paper_id, ".md").exists(): return [types.TextContent( type="text", text=json.dumps({ "status": "success", "message": "Paper already available", "resource_uri": f"file://{get_paper_path(paper_id, '.md')}" }) )] # Check if already in progress if paper_id in conversion_statuses: status = conversion_statuses[paper_id] return [types.TextContent( type="text", text=json.dumps({ "status": status.status, "message": f"Paper conversion {status.status}", "started_at": status.started_at.isoformat() }) )] # Start new download and conversion pdf_path = get_paper_path(paper_id, ".pdf") client = arxiv.Client() # Initialize status conversion_statuses[paper_id] = ConversionStatus( paper_id=paper_id, status="downloading", started_at=datetime.now() ) # Download PDF paper = next(client.results(arxiv.Search(id_list=[paper_id]))) paper.download_pdf(dirpath=pdf_path.parent, filename=pdf_path.name) # Update status and start conversion status = conversion_statuses[paper_id] status.status = "converting" # Start conversion in thread asyncio.create_task( asyncio.to_thread(convert_pdf_to_markdown, paper_id, pdf_path) ) return [types.TextContent( type="text", text=json.dumps({ "status": "converting", "message": "Paper downloaded, conversion started", "started_at": status.started_at.isoformat() }) )] except StopIteration: return [types.TextContent( type="text", text=json.dumps({ "status": "error", "message": f"Paper {paper_id} not found on arXiv" }) )] except Exception as e: return [types.TextContent( type="text", text=json.dumps({ "status": "error", "message": f"Error: {str(e)}" }) )]
- Tool schema defining name, description, and input validation schema for download_paper.download_tool = types.Tool( name="download_paper", description="Download a paper and create a resource for it", inputSchema={ "type": "object", "properties": { "paper_id": { "type": "string", "description": "The arXiv ID of the paper to download" }, "check_status": { "type": "boolean", "description": "If true, only check conversion status without downloading", "default": False } }, "required": ["paper_id"] } )
- src/arxiv_mcp_server/server.py:41-44 (registration)Registers the download_paper tool by including download_tool in the list returned by list_tools().@server.list_tools() async def list_tools() -> List[types.Tool]: """List available arXiv research tools.""" return [search_tool, download_tool, list_tool, read_tool]
- src/arxiv_mcp_server/server.py:47-64 (registration)MCP server call_tool handler that dispatches download_paper calls to the handle_download function.@server.call_tool() async def call_tool(name: str, arguments: Dict[str, Any]) -> List[types.TextContent]: """Handle tool calls for arXiv research functionality.""" logger.debug(f"Calling tool {name} with arguments {arguments}") try: if name == "search_papers": return await handle_search(arguments) elif name == "download_paper": return await handle_download(arguments) elif name == "list_papers": return await handle_list_papers(arguments) elif name == "read_paper": return await handle_read_paper(arguments) else: return [types.TextContent(type="text", text=f"Error: Unknown tool {name}")] except Exception as e: logger.error(f"Tool error: {str(e)}") return [types.TextContent(type="text", text=f"Error: {str(e)}")]
- Helper function to convert downloaded PDF to Markdown format asynchronously, updates status, and cleans up PDF.def convert_pdf_to_markdown(paper_id: str, pdf_path: Path) -> None: """Convert PDF to Markdown in a separate thread.""" try: logger.info(f"Starting conversion for {paper_id}") markdown = pymupdf4llm.to_markdown(pdf_path, show_progress=False) md_path = get_paper_path(paper_id, ".md") with open(md_path, "w", encoding="utf-8") as f: f.write(markdown) status = conversion_statuses.get(paper_id) if status: status.status = "success" status.completed_at = datetime.now() # Clean up PDF after successful conversion pdf_path.unlink() logger.info(f"Conversion completed for {paper_id}") except Exception as e: logger.error(f"Conversion failed for {paper_id}: {str(e)}") status = conversion_statuses.get(paper_id) if status: status.status = "error" status.completed_at = datetime.now() status.error = str(e)