@mcp.tool()
async def download_paper_pdf(paper_id: str, download_path: Optional[str] = None) -> str:
"""
Download the PDF of a paper if available, using title as filename and setting metadata.
Args:
paper_id: Paper ID (Semantic Scholar ID, DOI, ArXiv ID, etc.)
download_path: Directory to save the PDF (default: ~/Downloads/semantic_scholar_papers)
Returns:
Status message with download location or error
"""
# Get paper info including title, authors, year, and PDF URL
paper_result = await make_api_request(
f"paper/{quote(paper_id, safe='')}",
{"fields": "paperId,title,authors,year,openAccessPdf"},
)
if paper_result is None:
return "Error: Failed to fetch paper information"
if "error" in paper_result:
return f"Error: {paper_result['error']}"
# Check if PDF is available
open_access = paper_result.get("openAccessPdf")
if not open_access or not open_access.get("url"):
return "Error: No open access PDF available for this paper"
pdf_url = open_access["url"]
title = paper_result.get("title", "Unknown Paper")
authors = paper_result.get("authors", [])
year = paper_result.get("year")
# paper_id from API response
_ = paper_result.get("paperId", paper_id)
# Set up download path
if download_path is None:
download_dir = Path.home() / "Downloads" / "semantic_scholar_papers"
else:
download_dir = Path(download_path)
# Create directory if it doesn't exist
download_dir.mkdir(parents=True, exist_ok=True)
# Create filename from title
safe_title = create_safe_filename(title)
year_str = f" ({year})" if year else ""
filename = f"{safe_title}{year_str}.pdf"
file_path = download_dir / filename
# Handle duplicate filenames
counter = 1
original_file_path = file_path
while file_path.exists():
stem = original_file_path.stem
suffix = original_file_path.suffix
file_path = original_file_path.parent / f"{stem} ({counter}){suffix}"
counter += 1
try:
async with httpx.AsyncClient(timeout=60.0) as client:
headers = {"User-Agent": "semantic-scholar-mcp/1.0"}
response = await client.get(pdf_url, headers=headers, follow_redirects=True)
response.raise_for_status()
# Check if it's actually a PDF
content_type = response.headers.get("content-type", "")
if "pdf" not in content_type.lower() and not pdf_url.lower().endswith(
".pdf"
):
return f"Warning: Downloaded file may not be a PDF (Content-Type: {content_type})"
# Write the PDF file
with open(file_path, "wb") as f:
f.write(response.content)
file_size = len(response.content) / (1024 * 1024) # MB
# Set PDF metadata
metadata_set = set_pdf_metadata(file_path, title, authors, year)
# Create author summary for output
author_names = [author.get("name", "") for author in authors[:3]]
author_summary = ", ".join(author_names)
if len(authors) > 3:
author_summary += f" and {len(authors) - 3} others"
result = "✅ PDF downloaded successfully!\n\n"
result += f"Title: {title}\n"
result += f"Authors: {author_summary}\n"
if year:
result += f"Year: {year}\n"
result += f"Saved to: {file_path}\n"
result += f"File size: {file_size:.2f} MB\n"
if metadata_set:
result += "✅ PDF metadata set with title, authors, and year"
else:
result += "⚠️ PDF saved but metadata not set (install PyPDF2 for metadata support)"
return result
except httpx.HTTPError as e:
return f"Error downloading PDF: {str(e)}"
except Exception as e:
return f"Error saving PDF: {str(e)}"