Skip to main content
Glama

download_paper

Download academic papers from arXiv by ID to create accessible resources for research and analysis.

Instructions

Download a paper and create a resource for it

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
paper_idYesThe arXiv ID of the paper to download
check_statusNoIf true, only check conversion status without downloading

Implementation Reference

  • The `handle_download` function implements the core logic for the `download_paper` tool. It handles downloading the arXiv paper PDF, converting it to Markdown using pymupdf4llm, manages asynchronous conversion status, and returns JSON status updates.
    async def handle_download(arguments: Dict[str, Any]) -> List[types.TextContent]:
        """Handle paper download and conversion requests."""
        try:
            paper_id = arguments["paper_id"]
            check_status = arguments.get("check_status", False)
    
            # If only checking status
            if check_status:
                status = conversion_statuses.get(paper_id)
                if not status:
                    if get_paper_path(paper_id, ".md").exists():
                        return [
                            types.TextContent(
                                type="text",
                                text=json.dumps(
                                    {
                                        "status": "success",
                                        "message": "Paper is ready",
                                        "resource_uri": f"file://{get_paper_path(paper_id, '.md')}",
                                    }
                                ),
                            )
                        ]
                    return [
                        types.TextContent(
                            type="text",
                            text=json.dumps(
                                {
                                    "status": "unknown",
                                    "message": "No download or conversion in progress",
                                }
                            ),
                        )
                    ]
    
                return [
                    types.TextContent(
                        type="text",
                        text=json.dumps(
                            {
                                "status": status.status,
                                "started_at": status.started_at.isoformat(),
                                "completed_at": (
                                    status.completed_at.isoformat()
                                    if status.completed_at
                                    else None
                                ),
                                "error": status.error,
                                "message": f"Paper conversion {status.status}",
                            }
                        ),
                    )
                ]
    
            # Check if paper is already converted
            if get_paper_path(paper_id, ".md").exists():
                return [
                    types.TextContent(
                        type="text",
                        text=json.dumps(
                            {
                                "status": "success",
                                "message": "Paper already available",
                                "resource_uri": f"file://{get_paper_path(paper_id, '.md')}",
                            }
                        ),
                    )
                ]
    
            # Check if already in progress
            if paper_id in conversion_statuses:
                status = conversion_statuses[paper_id]
                return [
                    types.TextContent(
                        type="text",
                        text=json.dumps(
                            {
                                "status": status.status,
                                "message": f"Paper conversion {status.status}",
                                "started_at": status.started_at.isoformat(),
                            }
                        ),
                    )
                ]
    
            # Start new download and conversion
            pdf_path = get_paper_path(paper_id, ".pdf")
            client = arxiv.Client()
    
            # Initialize status
            conversion_statuses[paper_id] = ConversionStatus(
                paper_id=paper_id, status="downloading", started_at=datetime.now()
            )
    
            # Download PDF
            paper = next(client.results(arxiv.Search(id_list=[paper_id])))
            paper.download_pdf(dirpath=pdf_path.parent, filename=pdf_path.name)
    
            # Update status and start conversion
            status = conversion_statuses[paper_id]
            status.status = "converting"
    
            # Start conversion in thread
            asyncio.create_task(
                asyncio.to_thread(convert_pdf_to_markdown, paper_id, pdf_path)
            )
    
            return [
                types.TextContent(
                    type="text",
                    text=json.dumps(
                        {
                            "status": "converting",
                            "message": "Paper downloaded, conversion started",
                            "started_at": status.started_at.isoformat(),
                        }
                    ),
                )
            ]
    
        except StopIteration:
            return [
                types.TextContent(
                    type="text",
                    text=json.dumps(
                        {
                            "status": "error",
                            "message": f"Paper {paper_id} not found on arXiv",
                        }
                    ),
                )
            ]
        except Exception as e:
            return [
                types.TextContent(
                    type="text",
                    text=json.dumps({"status": "error", "message": f"Error: {str(e)}"}),
                )
            ]
  • The `download_tool` object defines the tool's metadata, description, and input schema requiring `paper_id` and optional `check_status`.
    download_tool = types.Tool(
        name="download_paper",
        description="Download a paper and create a resource for it",
        inputSchema={
            "type": "object",
            "properties": {
                "paper_id": {
                    "type": "string",
                    "description": "The arXiv ID of the paper to download",
                },
                "check_status": {
                    "type": "boolean",
                    "description": "If true, only check conversion status without downloading",
                    "default": False,
                },
            },
            "required": ["paper_id"],
        },
    )
  • The `list_tools` method registers `download_tool` as one of the available tools returned to the MCP client.
    @server.list_tools()
    async def list_tools() -> List[types.Tool]:
        """List available arXiv research tools."""
        return [search_tool, download_tool, list_tool, read_tool]
  • The `call_tool` method dispatches calls to `download_paper` by invoking the `handle_download` function.
    @server.call_tool()
    async def call_tool(name: str, arguments: Dict[str, Any]) -> List[types.TextContent]:
        """Handle tool calls for arXiv research functionality."""
        logger.debug(f"Calling tool {name} with arguments {arguments}")
        try:
            if name == "search_papers":
                return await handle_search(arguments)
            elif name == "download_paper":
                return await handle_download(arguments)
            elif name == "list_papers":
                return await handle_list_papers(arguments)
            elif name == "read_paper":
                return await handle_read_paper(arguments)
            else:
                return [types.TextContent(type="text", text=f"Error: Unknown tool {name}")]
        except Exception as e:
            logger.error(f"Tool error: {str(e)}")
            return [types.TextContent(type="text", text=f"Error: {str(e)}")]
  • The tools/__init__.py re-exports `download_tool` and `handle_download` for easy import in server.py.
    from .search import search_tool, handle_search
    from .download import download_tool, handle_download
    from .list_papers import list_tool, handle_list_papers
    from .read_paper import read_tool, handle_read_paper

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/blazickjp/arxiv-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server