download_paper_pdf

Instructions

Download the PDF of a paper if available, using title as filename and setting metadata. Args: paper_id: Paper ID (Semantic Scholar ID, DOI, ArXiv ID, etc.) download_path: Directory to save the PDF (default: ~/Downloads/semantic_scholar_papers) Returns: Status message with download location or error

Input Schema

TableJSON Schema

Name	Required	Description	Default
`paper_id`	Yes
`download_path`	No

Implementation Reference

src/semantic_scholar_mcp/server.py:645-753 (handler)
The core handler function for the 'download_paper_pdf' tool. It is registered via the @mcp.tool() decorator. Fetches paper metadata, downloads the open access PDF if available, generates a safe filename from the title, handles duplicates, optionally sets PDF metadata using PyPDF2, and returns a success message with file details.
@mcp.tool() async def download_paper_pdf(paper_id: str, download_path: Optional[str] = None) -> str: """ Download the PDF of a paper if available, using title as filename and setting metadata. Args: paper_id: Paper ID (Semantic Scholar ID, DOI, ArXiv ID, etc.) download_path: Directory to save the PDF (default: ~/Downloads/semantic_scholar_papers) Returns: Status message with download location or error """ # Get paper info including title, authors, year, and PDF URL paper_result = await make_api_request( f"paper/{quote(paper_id, safe='')}", {"fields": "paperId,title,authors,year,openAccessPdf"}, ) if paper_result is None: return "Error: Failed to fetch paper information" if "error" in paper_result: return f"Error: {paper_result['error']}" # Check if PDF is available open_access = paper_result.get("openAccessPdf") if not open_access or not open_access.get("url"): return "Error: No open access PDF available for this paper" pdf_url = open_access["url"] title = paper_result.get("title", "Unknown Paper") authors = paper_result.get("authors", []) year = paper_result.get("year") # paper_id from API response _ = paper_result.get("paperId", paper_id) # Set up download path if download_path is None: download_dir = Path.home() / "Downloads" / "semantic_scholar_papers" else: download_dir = Path(download_path) # Create directory if it doesn't exist download_dir.mkdir(parents=True, exist_ok=True) # Create filename from title safe_title = create_safe_filename(title) year_str = f" ({year})" if year else "" filename = f"{safe_title}{year_str}.pdf" file_path = download_dir / filename # Handle duplicate filenames counter = 1 original_file_path = file_path while file_path.exists(): stem = original_file_path.stem suffix = original_file_path.suffix file_path = original_file_path.parent / f"{stem} ({counter}){suffix}" counter += 1 try: async with httpx.AsyncClient(timeout=60.0) as client: headers = {"User-Agent": "semantic-scholar-mcp/1.0"} response = await client.get(pdf_url, headers=headers, follow_redirects=True) response.raise_for_status() # Check if it's actually a PDF content_type = response.headers.get("content-type", "") if "pdf" not in content_type.lower() and not pdf_url.lower().endswith( ".pdf" ): return f"Warning: Downloaded file may not be a PDF (Content-Type: {content_type})" # Write the PDF file with open(file_path, "wb") as f: f.write(response.content) file_size = len(response.content) / (1024 * 1024) # MB # Set PDF metadata metadata_set = set_pdf_metadata(file_path, title, authors, year) # Create author summary for output author_names = [author.get("name", "") for author in authors[:3]] author_summary = ", ".join(author_names) if len(authors) > 3: author_summary += f" and {len(authors) - 3} others" result = "✅ PDF downloaded successfully!\n\n" result += f"Title: {title}\n" result += f"Authors: {author_summary}\n" if year: result += f"Year: {year}\n" result += f"Saved to: {file_path}\n" result += f"File size: {file_size:.2f} MB\n" if metadata_set: result += "✅ PDF metadata set with title, authors, and year" else: result += "⚠️ PDF saved but metadata not set (install PyPDF2 for metadata support)" return result except httpx.HTTPError as e: return f"Error downloading PDF: {str(e)}" except Exception as e: return f"Error saving PDF: {str(e)}"
src/semantic_scholar_mcp/server.py:579-591 (helper)
Helper function to generate a safe filename from the paper title by removing invalid characters, normalizing whitespace, and truncating to a maximum length.
def create_safe_filename(title: str, max_length: int = 100) -> str: """Create a safe filename from paper title.""" # Remove/replace problematic characters safe_title = re.sub(r'[<>:"/\\|?*]', "", title) # Remove forbidden chars safe_title = re.sub(r"\s+", " ", safe_title) # Normalize whitespace safe_title = safe_title.strip() # Limit length if len(safe_title) > max_length: safe_title = safe_title[:max_length].rsplit(" ", 1)[0] # Break at word boundary return safe_title if safe_title else "Unknown_Paper"
src/semantic_scholar_mcp/server.py:593-643 (helper)
Helper function to set PDF metadata (title, authors, year) using PyPDF2 if installed, gracefully handling missing library or errors.
def set_pdf_metadata( file_path: Path, title: str, authors: List[Dict], year: Optional[int] ): """Set PDF metadata using PyPDF2 if available.""" try: from PyPDF2 import PdfReader, PdfWriter # Read the existing PDF with open(file_path, "rb") as f: reader = PdfReader(f) writer = PdfWriter() # Copy all pages for page in reader.pages: writer.add_page(page) # Create author string author_names = [ author.get("name", "") for author in authors if author.get("name") ] author_str = ", ".join(author_names[:5]) # Limit to first 5 authors if len(authors) > 5: author_str += " et al." # Set metadata metadata = { "/Title": title, "/Author": author_str, "/Creator": "Semantic Scholar MCP", "/Producer": "Semantic Scholar MCP", } if year: metadata["/CreationDate"] = f"D:{year}0101000000Z" writer.add_metadata(metadata) # Write back to file with open(file_path, "wb") as output_f: writer.write(output_f) return True except ImportError: # PyPDF2 not available - skip metadata setting return False except Exception as e: # Error setting metadata - file is still saved print(f"Warning: Could not set PDF metadata: {e}") return False

Semantic Scholar MCP Server

Instructions

Input Schema

Implementation Reference

Other Tools

Latest Blog Posts

MCP directory API