Skip to main content
Glama
by fegizii

download_paper_pdf

Download PDFs of academic papers from Semantic Scholar by providing a paper ID. Save files with titles as filenames and set metadata, specifying a custom download directory if needed.

Instructions

Download the PDF of a paper if available, using title as filename and setting metadata. Args: paper_id: Paper ID (Semantic Scholar ID, DOI, ArXiv ID, etc.) download_path: Directory to save the PDF (default: ~/Downloads/semantic_scholar_papers) Returns: Status message with download location or error

Input Schema

NameRequiredDescriptionDefault
download_pathNo
paper_idYes

Input Schema (JSON Schema)

{ "properties": { "download_path": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "title": "Download Path" }, "paper_id": { "title": "Paper Id", "type": "string" } }, "required": [ "paper_id" ], "title": "download_paper_pdfArguments", "type": "object" }

Implementation Reference

  • Main execution logic for the 'download_paper_pdf' tool. Fetches paper details from Semantic Scholar API, downloads the open access PDF if available, saves it with a safe filename based on title and year, handles duplicates, optionally sets metadata, and returns success/error message.
    @mcp.tool() async def download_paper_pdf(paper_id: str, download_path: Optional[str] = None) -> str: """ Download the PDF of a paper if available, using title as filename and setting metadata. Args: paper_id: Paper ID (Semantic Scholar ID, DOI, ArXiv ID, etc.) download_path: Directory to save the PDF (default: ~/Downloads/semantic_scholar_papers) Returns: Status message with download location or error """ # Get paper info including title, authors, year, and PDF URL paper_result = await make_api_request( f"paper/{quote(paper_id, safe='')}", {"fields": "paperId,title,authors,year,openAccessPdf"}, ) if paper_result is None: return "Error: Failed to fetch paper information" if "error" in paper_result: return f"Error: {paper_result['error']}" # Check if PDF is available open_access = paper_result.get("openAccessPdf") if not open_access or not open_access.get("url"): return "Error: No open access PDF available for this paper" pdf_url = open_access["url"] title = paper_result.get("title", "Unknown Paper") authors = paper_result.get("authors", []) year = paper_result.get("year") # paper_id from API response _ = paper_result.get("paperId", paper_id) # Set up download path if download_path is None: download_dir = Path.home() / "Downloads" / "semantic_scholar_papers" else: download_dir = Path(download_path) # Create directory if it doesn't exist download_dir.mkdir(parents=True, exist_ok=True) # Create filename from title safe_title = create_safe_filename(title) year_str = f" ({year})" if year else "" filename = f"{safe_title}{year_str}.pdf" file_path = download_dir / filename # Handle duplicate filenames counter = 1 original_file_path = file_path while file_path.exists(): stem = original_file_path.stem suffix = original_file_path.suffix file_path = original_file_path.parent / f"{stem} ({counter}){suffix}" counter += 1 try: async with httpx.AsyncClient(timeout=60.0) as client: headers = {"User-Agent": "semantic-scholar-mcp/1.0"} response = await client.get(pdf_url, headers=headers, follow_redirects=True) response.raise_for_status() # Check if it's actually a PDF content_type = response.headers.get("content-type", "") if "pdf" not in content_type.lower() and not pdf_url.lower().endswith( ".pdf" ): return f"Warning: Downloaded file may not be a PDF (Content-Type: {content_type})" # Write the PDF file with open(file_path, "wb") as f: f.write(response.content) file_size = len(response.content) / (1024 * 1024) # MB # Set PDF metadata metadata_set = set_pdf_metadata(file_path, title, authors, year) # Create author summary for output author_names = [author.get("name", "") for author in authors[:3]] author_summary = ", ".join(author_names) if len(authors) > 3: author_summary += f" and {len(authors) - 3} others" result = "✅ PDF downloaded successfully!\n\n" result += f"Title: {title}\n" result += f"Authors: {author_summary}\n" if year: result += f"Year: {year}\n" result += f"Saved to: {file_path}\n" result += f"File size: {file_size:.2f} MB\n" if metadata_set: result += "✅ PDF metadata set with title, authors, and year" else: result += "⚠️ PDF saved but metadata not set (install PyPDF2 for metadata support)" return result except httpx.HTTPError as e: return f"Error downloading PDF: {str(e)}" except Exception as e: return f"Error saving PDF: {str(e)}"
  • Utility function to sanitize paper title into a safe PDF filename by removing invalid characters, normalizing spaces, limiting length, and handling empty titles.
    def create_safe_filename(title: str, max_length: int = 100) -> str: """Create a safe filename from paper title.""" # Remove/replace problematic characters safe_title = re.sub(r'[<>:"/\\|?*]', "", title) # Remove forbidden chars safe_title = re.sub(r"\s+", " ", safe_title) # Normalize whitespace safe_title = safe_title.strip() # Limit length if len(safe_title) > max_length: safe_title = safe_title[:max_length].rsplit(" ", 1)[0] # Break at word boundary return safe_title if safe_title else "Unknown_Paper"
  • Utility function to embed PDF metadata (title, authors, year) into the downloaded PDF file using PyPDF2 library if installed, gracefully handles missing library or errors.
    def set_pdf_metadata( file_path: Path, title: str, authors: List[Dict], year: Optional[int] ): """Set PDF metadata using PyPDF2 if available.""" try: from PyPDF2 import PdfReader, PdfWriter # Read the existing PDF with open(file_path, "rb") as f: reader = PdfReader(f) writer = PdfWriter() # Copy all pages for page in reader.pages: writer.add_page(page) # Create author string author_names = [ author.get("name", "") for author in authors if author.get("name") ] author_str = ", ".join(author_names[:5]) # Limit to first 5 authors if len(authors) > 5: author_str += " et al." # Set metadata metadata = { "/Title": title, "/Author": author_str, "/Creator": "Semantic Scholar MCP", "/Producer": "Semantic Scholar MCP", } if year: metadata["/CreationDate"] = f"D:{year}0101000000Z" writer.add_metadata(metadata) # Write back to file with open(file_path, "wb") as output_f: writer.write(output_f) return True except ImportError: # PyPDF2 not available - skip metadata setting return False except Exception as e: # Error setting metadata - file is still saved print(f"Warning: Could not set PDF metadata: {e}") return False

Other Tools

Related Tools

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/fegizii/SemanticScholarMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server