Skip to main content
Glama
huanongfish

ArXiv MCP Server

by huanongfish

download_paper

Download arXiv research papers by ID to create accessible resources for reading and analysis.

Instructions

Download a paper and create a resource for it

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
paper_idYesThe arXiv ID of the paper to download
check_statusNoIf true, only check conversion status without downloading

Implementation Reference

  • Main execution logic for the download_paper tool: downloads arXiv paper PDF, converts to Markdown asynchronously, tracks status, and returns JSON status updates.
    async def handle_download(arguments: Dict[str, Any]) -> List[types.TextContent]:
        """Handle paper download and conversion requests."""
        try:
            paper_id = arguments["paper_id"]
            check_status = arguments.get("check_status", False)
            
            # If only checking status
            if check_status:
                status = conversion_statuses.get(paper_id)
                if not status:
                    if get_paper_path(paper_id, ".md").exists():
                        return [types.TextContent(
                            type="text",
                            text=json.dumps({
                                "status": "success",
                                "message": "Paper is ready",
                                "resource_uri": f"file://{get_paper_path(paper_id, '.md')}"
                            })
                        )]
                    return [types.TextContent(
                        type="text",
                        text=json.dumps({
                            "status": "unknown",
                            "message": "No download or conversion in progress"
                        })
                    )]
                
                return [types.TextContent(
                    type="text",
                    text=json.dumps({
                        "status": status.status,
                        "started_at": status.started_at.isoformat(),
                        "completed_at": status.completed_at.isoformat() if status.completed_at else None,
                        "error": status.error,
                        "message": f"Paper conversion {status.status}"
                    })
                )]
            
            # Check if paper is already converted
            if get_paper_path(paper_id, ".md").exists():
                return [types.TextContent(
                    type="text",
                    text=json.dumps({
                        "status": "success",
                        "message": "Paper already available",
                        "resource_uri": f"file://{get_paper_path(paper_id, '.md')}"
                    })
                )]
            
            # Check if already in progress
            if paper_id in conversion_statuses:
                status = conversion_statuses[paper_id]
                return [types.TextContent(
                    type="text",
                    text=json.dumps({
                        "status": status.status,
                        "message": f"Paper conversion {status.status}",
                        "started_at": status.started_at.isoformat()
                    })
                )]
            
            # Start new download and conversion
            pdf_path = get_paper_path(paper_id, ".pdf")
            client = arxiv.Client()
            
            # Initialize status
            conversion_statuses[paper_id] = ConversionStatus(
                paper_id=paper_id,
                status="downloading", 
                started_at=datetime.now()
            )
            
            # Download PDF
            paper = next(client.results(arxiv.Search(id_list=[paper_id])))
            paper.download_pdf(dirpath=pdf_path.parent, filename=pdf_path.name)
            
            # Update status and start conversion
            status = conversion_statuses[paper_id]
            status.status = "converting"
            
            # Start conversion in thread
            asyncio.create_task(
                asyncio.to_thread(convert_pdf_to_markdown, paper_id, pdf_path)
            )
            
            return [types.TextContent(
                type="text",
                text=json.dumps({
                    "status": "converting",
                    "message": "Paper downloaded, conversion started",
                    "started_at": status.started_at.isoformat()
                })
            )]
            
        except StopIteration:
            return [types.TextContent(
                type="text",
                text=json.dumps({
                    "status": "error",
                    "message": f"Paper {paper_id} not found on arXiv"
                })
            )]
        except Exception as e:
            return [types.TextContent(
                type="text",
                text=json.dumps({
                    "status": "error",
                    "message": f"Error: {str(e)}"
                })
            )]
  • Tool schema defining name, description, and input validation schema for download_paper.
    download_tool = types.Tool(
        name="download_paper",
        description="Download a paper and create a resource for it",
        inputSchema={
            "type": "object",
            "properties": {
                "paper_id": {
                    "type": "string",
                    "description": "The arXiv ID of the paper to download"
                },
                "check_status": {
                    "type": "boolean",
                    "description": "If true, only check conversion status without downloading",
                    "default": False
                }
            },
            "required": ["paper_id"]
        }
    )
  • Registers the download_paper tool by including download_tool in the list returned by list_tools().
    @server.list_tools()
    async def list_tools() -> List[types.Tool]:
        """List available arXiv research tools."""
        return [search_tool, download_tool, list_tool, read_tool]
  • MCP server call_tool handler that dispatches download_paper calls to the handle_download function.
    @server.call_tool()
    async def call_tool(name: str, arguments: Dict[str, Any]) -> List[types.TextContent]:
        """Handle tool calls for arXiv research functionality."""
        logger.debug(f"Calling tool {name} with arguments {arguments}")
        try:
            if name == "search_papers":
                return await handle_search(arguments)
            elif name == "download_paper":
                return await handle_download(arguments)
            elif name == "list_papers":
                return await handle_list_papers(arguments)
            elif name == "read_paper":
                return await handle_read_paper(arguments)
            else:
                return [types.TextContent(type="text", text=f"Error: Unknown tool {name}")]
        except Exception as e:
            logger.error(f"Tool error: {str(e)}")
            return [types.TextContent(type="text", text=f"Error: {str(e)}")]
  • Helper function to convert downloaded PDF to Markdown format asynchronously, updates status, and cleans up PDF.
    def convert_pdf_to_markdown(paper_id: str, pdf_path: Path) -> None:
        """Convert PDF to Markdown in a separate thread."""
        try:
            logger.info(f"Starting conversion for {paper_id}")
            markdown = pymupdf4llm.to_markdown(pdf_path, show_progress=False)
            md_path = get_paper_path(paper_id, ".md")
            
            with open(md_path, "w", encoding="utf-8") as f:
                f.write(markdown)
    
            status = conversion_statuses.get(paper_id)
            if status:
                status.status = "success"
                status.completed_at = datetime.now()
                
            # Clean up PDF after successful conversion
            pdf_path.unlink()
            logger.info(f"Conversion completed for {paper_id}")
            
        except Exception as e:
            logger.error(f"Conversion failed for {paper_id}: {str(e)}")
            status = conversion_statuses.get(paper_id)
            if status:
                status.status = "error"
                status.completed_at = datetime.now()
                status.error = str(e)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/huanongfish/arxiv-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server