semantic_scholar_get_paper
Retrieve academic paper details using identifiers like DOI, arXiv ID, or Semantic Scholar ID, with options to include citations and references.
Instructions
Get paper details. Accepts: S2 ID, DOI:xxx, ARXIV:xxx, PMID:xxx, CorpusId:xxx
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| params | Yes |
Implementation Reference
- The core handler function for the 'semantic_scholar_get_paper' tool. Fetches paper details from Semantic Scholar API by paper_id (supports S2 ID, DOI, ArXiv, PMID, CorpusId), optionally includes citations and references, supports markdown or JSON output.@mcp.tool(name="semantic_scholar_get_paper") async def get_paper_details(params: PaperDetailsInput) -> str: """Get paper details. Accepts: S2 ID, DOI:xxx, ARXIV:xxx, PMID:xxx, CorpusId:xxx""" logger.info(f"Getting paper: {params.paper_id}") paper = await _make_request("GET", f"paper/{params.paper_id}", params={"fields": ",".join(PAPER_FIELDS)}) result = {"paper": paper} if params.include_citations: cit = await _make_request("GET", f"paper/{params.paper_id}/citations", params={"fields": ",".join(PAPER_FIELDS), "limit": params.citations_limit}) result["citations"] = cit.get("data", []) if params.include_references: ref = await _make_request("GET", f"paper/{params.paper_id}/references", params={"fields": ",".join(PAPER_FIELDS), "limit": params.references_limit}) result["references"] = ref.get("data", []) if params.response_format == ResponseFormat.JSON: return json.dumps(result, indent=2) lines = ["## Paper Details", "", _format_paper_markdown(paper)] if result.get("citations"): lines.extend(["---", f"### Citing Papers ({len(result['citations'])} shown)", ""]) for c in result["citations"]: p = c.get("citingPaper", {}) if p: lines.append(f"- **{p.get('title', '?')}** ({p.get('year', '')}) - {p.get('citationCount', 0)} citations") if result.get("references"): lines.extend(["---", f"### References ({len(result['references'])} shown)", ""]) for r in result["references"]: p = r.get("citedPaper", {}) if p: lines.append(f"- **{p.get('title', '?')}** ({p.get('year', '')}) - {p.get('citationCount', 0)} citations") return "\n".join(lines)
- Pydantic schema defining input parameters for the semantic_scholar_get_paper tool, including paper_id, flags for citations/references, limits, and output format.class PaperDetailsInput(BaseModel): model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") paper_id: str = Field(..., description="Paper ID: S2 ID, DOI:xxx, ARXIV:xxx, PMID:xxx, CorpusId:xxx", min_length=1) include_citations: bool = Field(default=False, description="Include citing papers") include_references: bool = Field(default=False, description="Include referenced papers") citations_limit: int = Field(default=10, description="Max citations to return", ge=1, le=100) references_limit: int = Field(default=10, description="Max references to return", ge=1, le=100) response_format: ResponseFormat = Field(default=ResponseFormat.MARKDOWN, description="Output format")
- Helper function to format paper data into readable Markdown, used in the tool's output.def _format_paper_markdown(paper: Dict[str, Any]) -> str: lines = [] title = paper.get("title", "Unknown Title") year = paper.get("year", "N/A") lines.append(f"### {title} ({year})") authors = paper.get("authors", []) if authors: names = [a.get("name", "?") for a in authors[:5]] if len(authors) > 5: names.append(f"... +{len(authors)-5} more") lines.append(f"**Authors:** {', '.join(names)}") venue = paper.get("venue") or (paper.get("publicationVenue") or {}).get("name") if venue: lines.append(f"**Venue:** {venue}") citations = paper.get("citationCount", 0) influential = paper.get("influentialCitationCount", 0) lines.append(f"**Citations:** {citations} ({influential} influential)") pdf_info = paper.get("openAccessPdf") or {} if pdf_info.get("url"): lines.append(f"**Open Access:** [PDF]({pdf_info['url']})") fields = paper.get("fieldsOfStudy") or [] if fields: lines.append(f"**Fields:** {', '.join(fields[:5])}") tldr = paper.get("tldr") or {} if tldr.get("text"): lines.append(f"**TL;DR:** {tldr['text']}") abstract = paper.get("abstract") if abstract: lines.append(f"**Abstract:** {abstract[:500]}..." if len(abstract) > 500 else f"**Abstract:** {abstract}") ext_ids = paper.get("externalIds") or {} ids = [] if ext_ids.get("DOI"): ids.append(f"DOI: {ext_ids['DOI']}") if ext_ids.get("ArXiv"): ids.append(f"ArXiv: {ext_ids['ArXiv']}") if ext_ids.get("PubMed"): ids.append(f"PMID: {ext_ids['PubMed']}") if ids: lines.append(f"**IDs:** {', '.join(ids)}") if paper.get("url"): lines.append(f"**Link:** [{paper.get('paperId')}]({paper['url']})") lines.append("") return "\n".join(lines)
- Core HTTP request helper to Semantic Scholar API, used by the tool handler for fetching paper, citations, and references.async def _make_request( method: str, endpoint: str, params: Optional[Dict] = None, json_body: Optional[Dict] = None ) -> Dict[str, Any]: url = f"{SEMANTIC_SCHOLAR_API_BASE}/{endpoint}" async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client: try: if method == "GET": resp = await client.get(url, params=params, headers=_get_headers()) else: resp = await client.post(url, params=params, json=json_body, headers=_get_headers()) resp.raise_for_status() return resp.json() except httpx.HTTPStatusError as e: _handle_error(e.response.status_code) except httpx.TimeoutException: raise Exception("Request timed out") return {}
- Enum used in the tool's input schema for specifying output format (markdown or json).class ResponseFormat(str, Enum): MARKDOWN = "markdown" JSON = "json"