Semantic Scholar MCP Server

Overview Inspect Schema Related Servers Score Discussions

semantic-scholar-mcp
src
semantic_scholar_mcp

server.py•21.1 kB

""" Semantic Scholar MCP Server =========================== Production MCP server providing direct access to Semantic Scholar's database of 200M+ academic papers within Claude Desktop. Tools Provided: - semantic_scholar_search_papers: Advanced paper search with filters - semantic_scholar_get_paper: Full paper details with citations/references - semantic_scholar_search_authors: Find researchers by name - semantic_scholar_get_author: Author profiles and publications - semantic_scholar_recommendations: AI-powered related paper discovery - semantic_scholar_bulk_papers: Batch retrieval (up to 500 papers) Configuration: Set SEMANTIC_SCHOLAR_API_KEY environment variable with your API key. Get a free key at: https://www.semanticscholar.org/product/api Author: Santiago Maniches - ORCID: https://orcid.org/0009-0005-6480-1987 - LinkedIn: https://www.linkedin.com/in/santiago-maniches/ Organization: TOPOLOGICA LLC - Website: https://topologica.ai - Email: santiago@topologica.ai License: MIT Repository: https://github.com/smaniches/semantic-scholar-mcp Copyright (c) 2025 TOPOLOGICA LLC. All rights reserved. """ from __future__ import annotations import json import logging import os from datetime import datetime, timezone from enum import Enum from typing import Any, Dict, List, Optional import httpx from mcp.server.fastmcp import FastMCP from pydantic import BaseModel, ConfigDict, Field # ═══════════════════════════════════════════════════════════════════════════════ # CONFIGURATION # ═══════════════════════════════════════════════════════════════════════════════ # API Key: Set via environment variable (user provides their own key) # Get free key at: https://www.semanticscholar.org/product/api SEMANTIC_SCHOLAR_API_KEY: str = os.environ.get("SEMANTIC_SCHOLAR_API_KEY", "") SEMANTIC_SCHOLAR_API_BASE: str = "https://api.semanticscholar.org/graph/v1" DEFAULT_TIMEOUT: float = 30.0 # Field sets for comprehensive paper metadata PAPER_FIELDS: List[str] = [ "paperId", "corpusId", "url", "title", "abstract", "venue", "publicationVenue", "year", "referenceCount", "citationCount", "influentialCitationCount", "isOpenAccess", "openAccessPdf", "fieldsOfStudy", "s2FieldsOfStudy", "publicationTypes", "publicationDate", "journal", "citationStyles", "authors", "externalIds", "tldr" ] AUTHOR_FIELDS: List[str] = [ "authorId", "externalIds", "url", "name", "aliases", "affiliations", "homepage", "paperCount", "citationCount", "hIndex" ] # Logging logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s") logger = logging.getLogger("semantic_scholar_mcp") # ═══════════════════════════════════════════════════════════════════════════════ # MCP SERVER # ═══════════════════════════════════════════════════════════════════════════════ mcp = FastMCP( "semantic_scholar_mcp", instructions=""" Semantic Scholar MCP Server - Access 200M+ academic papers. Created by Santiago Maniches (ORCID: 0009-0005-6480-1987) TOPOLOGICA LLC - https://topologica.ai Supports DOI, ArXiv, PubMed, ACL, and Semantic Scholar IDs. """ ) # ═══════════════════════════════════════════════════════════════════════════════ # PYDANTIC INPUT MODELS # ═══════════════════════════════════════════════════════════════════════════════ class ResponseFormat(str, Enum): MARKDOWN = "markdown" JSON = "json" class PaperSearchInput(BaseModel): model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") query: str = Field(..., description="Search query", min_length=1, max_length=500) year: Optional[str] = Field(default=None, description="Year filter: '2024', '2020-2024', '2020-'") fields_of_study: Optional[List[str]] = Field(default=None, description="Filter by fields: ['Computer Science', 'Biology']") publication_types: Optional[List[str]] = Field(default=None, description="Filter: 'Review', 'JournalArticle'") open_access_only: bool = Field(default=False, description="Only return open access papers") min_citation_count: Optional[int] = Field(default=None, description="Minimum citations", ge=0) limit: int = Field(default=10, description="Max results (1-100)", ge=1, le=100) offset: int = Field(default=0, description="Pagination offset", ge=0) response_format: ResponseFormat = Field(default=ResponseFormat.MARKDOWN, description="Output format") class PaperDetailsInput(BaseModel): model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") paper_id: str = Field(..., description="Paper ID: S2 ID, DOI:xxx, ARXIV:xxx, PMID:xxx, CorpusId:xxx", min_length=1) include_citations: bool = Field(default=False, description="Include citing papers") include_references: bool = Field(default=False, description="Include referenced papers") citations_limit: int = Field(default=10, description="Max citations to return", ge=1, le=100) references_limit: int = Field(default=10, description="Max references to return", ge=1, le=100) response_format: ResponseFormat = Field(default=ResponseFormat.MARKDOWN, description="Output format") class AuthorSearchInput(BaseModel): model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") query: str = Field(..., description="Author name to search", min_length=1, max_length=200) limit: int = Field(default=10, description="Max results", ge=1, le=100) offset: int = Field(default=0, description="Pagination offset", ge=0) response_format: ResponseFormat = Field(default=ResponseFormat.MARKDOWN, description="Output format") class AuthorDetailsInput(BaseModel): model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") author_id: str = Field(..., description="Semantic Scholar author ID", min_length=1) include_papers: bool = Field(default=True, description="Include publications") papers_limit: int = Field(default=20, description="Max papers to return", ge=1, le=100) response_format: ResponseFormat = Field(default=ResponseFormat.MARKDOWN, description="Output format") class PaperRecommendationsInput(BaseModel): model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") paper_id: str = Field(..., description="Seed paper ID for recommendations", min_length=1) limit: int = Field(default=10, description="Max recommendations", ge=1, le=100) response_format: ResponseFormat = Field(default=ResponseFormat.MARKDOWN, description="Output format") class BulkPaperInput(BaseModel): model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") paper_ids: List[str] = Field(..., description="List of paper IDs (max 500)", min_length=1, max_length=500) response_format: ResponseFormat = Field(default=ResponseFormat.JSON, description="Output format") # ═══════════════════════════════════════════════════════════════════════════════ # HTTP CLIENT # ═══════════════════════════════════════════════════════════════════════════════ def _get_headers() -> Dict[str, str]: headers = {"Accept": "application/json", "Content-Type": "application/json"} if SEMANTIC_SCHOLAR_API_KEY: headers["x-api-key"] = SEMANTIC_SCHOLAR_API_KEY return headers async def _make_request( method: str, endpoint: str, params: Optional[Dict] = None, json_body: Optional[Dict] = None ) -> Dict[str, Any]: url = f"{SEMANTIC_SCHOLAR_API_BASE}/{endpoint}" async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client: try: if method == "GET": resp = await client.get(url, params=params, headers=_get_headers()) else: resp = await client.post(url, params=params, json=json_body, headers=_get_headers()) resp.raise_for_status() return resp.json() except httpx.HTTPStatusError as e: _handle_error(e.response.status_code) except httpx.TimeoutException: raise Exception("Request timed out") return {} def _handle_error(status: int) -> None: errors = { 400: "Bad request. Check syntax.", 401: "Auth failed. Set SEMANTIC_SCHOLAR_API_KEY env var.", 403: "Forbidden. Check API key.", 404: "Not found. Check ID format.", 429: "Rate limited. Wait and retry.", 500: "Server error. Try later.", 503: "Service unavailable." } raise Exception(f"API Error ({status}): {errors.get(status, 'Unknown')}") # ═══════════════════════════════════════════════════════════════════════════════ # FORMATTING UTILITIES # ═══════════════════════════════════════════════════════════════════════════════ def _format_paper_markdown(paper: Dict[str, Any]) -> str: lines = [] title = paper.get("title", "Unknown Title") year = paper.get("year", "N/A") lines.append(f"### {title} ({year})") authors = paper.get("authors", []) if authors: names = [a.get("name", "?") for a in authors[:5]] if len(authors) > 5: names.append(f"... +{len(authors)-5} more") lines.append(f"**Authors:** {', '.join(names)}") venue = paper.get("venue") or (paper.get("publicationVenue") or {}).get("name") if venue: lines.append(f"**Venue:** {venue}") citations = paper.get("citationCount", 0) influential = paper.get("influentialCitationCount", 0) lines.append(f"**Citations:** {citations} ({influential} influential)") pdf_info = paper.get("openAccessPdf") or {} if pdf_info.get("url"): lines.append(f"**Open Access:** [PDF]({pdf_info['url']})") fields = paper.get("fieldsOfStudy") or [] if fields: lines.append(f"**Fields:** {', '.join(fields[:5])}") tldr = paper.get("tldr") or {} if tldr.get("text"): lines.append(f"**TL;DR:** {tldr['text']}") abstract = paper.get("abstract") if abstract: lines.append(f"**Abstract:** {abstract[:500]}..." if len(abstract) > 500 else f"**Abstract:** {abstract}") ext_ids = paper.get("externalIds") or {} ids = [] if ext_ids.get("DOI"): ids.append(f"DOI: {ext_ids['DOI']}") if ext_ids.get("ArXiv"): ids.append(f"ArXiv: {ext_ids['ArXiv']}") if ext_ids.get("PubMed"): ids.append(f"PMID: {ext_ids['PubMed']}") if ids: lines.append(f"**IDs:** {', '.join(ids)}") if paper.get("url"): lines.append(f"**Link:** [{paper.get('paperId')}]({paper['url']})") lines.append("") return "\n".join(lines) def _format_author_markdown(author: Dict[str, Any]) -> str: lines = [f"### {author.get('name', 'Unknown')}"] affiliations = author.get("affiliations") or [] if affiliations: lines.append(f"**Affiliations:** {', '.join(affiliations[:3])}") lines.append(f"**h-index:** {author.get('hIndex')} | **Papers:** {author.get('paperCount', 0)} | **Citations:** {author.get('citationCount', 0)}") if author.get("homepage"): lines.append(f"**Homepage:** {author['homepage']}") if author.get("url"): lines.append(f"**Profile:** [{author.get('authorId')}]({author['url']})") lines.append("") return "\n".join(lines) # ═══════════════════════════════════════════════════════════════════════════════ # MCP TOOLS # ═══════════════════════════════════════════════════════════════════════════════ @mcp.tool(name="semantic_scholar_search_papers") async def search_papers(params: PaperSearchInput) -> str: """Search for academic papers. Supports boolean operators (AND, OR, NOT), phrase search with quotes.""" logger.info(f"Searching: {params.query}") api_params = {"query": params.query, "offset": params.offset, "limit": params.limit, "fields": ",".join(PAPER_FIELDS)} if params.year: api_params["year"] = params.year if params.fields_of_study: api_params["fieldsOfStudy"] = ",".join(params.fields_of_study) if params.publication_types: api_params["publicationTypes"] = ",".join(params.publication_types) if params.open_access_only: api_params["openAccessPdf"] = "" if params.min_citation_count: api_params["minCitationCount"] = params.min_citation_count response = await _make_request("GET", "paper/search", params=api_params) total, papers = response.get("total", 0), response.get("data", []) if params.response_format == ResponseFormat.JSON: return json.dumps({"query": params.query, "total": total, "papers": papers}, indent=2) lines = [f"## Search Results: \"{params.query}\"", f"**Found:** {total} papers (showing {params.offset + 1}-{params.offset + len(papers)})", ""] for paper in papers: lines.append(_format_paper_markdown(paper)) if total > params.offset + len(papers): lines.append(f"*Use offset={params.offset + params.limit} to see more results*") return "\n".join(lines) @mcp.tool(name="semantic_scholar_get_paper") async def get_paper_details(params: PaperDetailsInput) -> str: """Get paper details. Accepts: S2 ID, DOI:xxx, ARXIV:xxx, PMID:xxx, CorpusId:xxx""" logger.info(f"Getting paper: {params.paper_id}") paper = await _make_request("GET", f"paper/{params.paper_id}", params={"fields": ",".join(PAPER_FIELDS)}) result = {"paper": paper} if params.include_citations: cit = await _make_request("GET", f"paper/{params.paper_id}/citations", params={"fields": ",".join(PAPER_FIELDS), "limit": params.citations_limit}) result["citations"] = cit.get("data", []) if params.include_references: ref = await _make_request("GET", f"paper/{params.paper_id}/references", params={"fields": ",".join(PAPER_FIELDS), "limit": params.references_limit}) result["references"] = ref.get("data", []) if params.response_format == ResponseFormat.JSON: return json.dumps(result, indent=2) lines = ["## Paper Details", "", _format_paper_markdown(paper)] if result.get("citations"): lines.extend(["---", f"### Citing Papers ({len(result['citations'])} shown)", ""]) for c in result["citations"]: p = c.get("citingPaper", {}) if p: lines.append(f"- **{p.get('title', '?')}** ({p.get('year', '')}) - {p.get('citationCount', 0)} citations") if result.get("references"): lines.extend(["---", f"### References ({len(result['references'])} shown)", ""]) for r in result["references"]: p = r.get("citedPaper", {}) if p: lines.append(f"- **{p.get('title', '?')}** ({p.get('year', '')}) - {p.get('citationCount', 0)} citations") return "\n".join(lines) @mcp.tool(name="semantic_scholar_search_authors") async def search_authors(params: AuthorSearchInput) -> str: """Search for academic authors by name.""" logger.info(f"Searching authors: {params.query}") response = await _make_request("GET", "author/search", params={"query": params.query, "offset": params.offset, "limit": params.limit, "fields": ",".join(AUTHOR_FIELDS)}) total, authors = response.get("total", 0), response.get("data", []) if params.response_format == ResponseFormat.JSON: return json.dumps({"query": params.query, "total": total, "authors": authors}, indent=2) lines = [f"## Author Search: \"{params.query}\"", f"**Found:** {total} authors", ""] for author in authors: lines.append(_format_author_markdown(author)) return "\n".join(lines) @mcp.tool(name="semantic_scholar_get_author") async def get_author_details(params: AuthorDetailsInput) -> str: """Get author profile with optional publications list.""" logger.info(f"Getting author: {params.author_id}") author = await _make_request("GET", f"author/{params.author_id}", params={"fields": ",".join(AUTHOR_FIELDS)}) result = {"author": author} if params.include_papers: papers = await _make_request("GET", f"author/{params.author_id}/papers", params={"fields": ",".join(PAPER_FIELDS), "limit": params.papers_limit}) result["papers"] = papers.get("data", []) if params.response_format == ResponseFormat.JSON: return json.dumps(result, indent=2) lines = ["## Author Profile", "", _format_author_markdown(author)] if result.get("papers"): lines.extend(["---", f"### Publications ({len(result['papers'])} shown)", ""]) for p in result["papers"]: lines.append(f"- **{p.get('title', '?')}** ({p.get('year', '')}) - {p.get('citationCount', 0)} citations") return "\n".join(lines) @mcp.tool(name="semantic_scholar_recommendations") async def get_recommendations(params: PaperRecommendationsInput) -> str: """Get paper recommendations based on a seed paper.""" logger.info(f"Recommendations for: {params.paper_id}") async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client: resp = await client.post( f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{params.paper_id}", params={"fields": ",".join(PAPER_FIELDS), "limit": params.limit}, json={"positivePaperIds": [params.paper_id]}, headers=_get_headers() ) resp.raise_for_status() data = resp.json() papers = data.get("recommendedPapers", []) if params.response_format == ResponseFormat.JSON: return json.dumps({"seed": params.paper_id, "recommendations": papers}, indent=2) lines = [f"## Recommendations", f"**Seed:** {params.paper_id}", f"**Found:** {len(papers)}", ""] for paper in papers: lines.append(_format_paper_markdown(paper)) return "\n".join(lines) @mcp.tool(name="semantic_scholar_bulk_papers") async def get_bulk_papers(params: BulkPaperInput) -> str: """Retrieve multiple papers in a single request (max 500).""" logger.info(f"Bulk retrieval: {len(params.paper_ids)} papers") response = await _make_request("POST", "paper/batch", params={"fields": ",".join(PAPER_FIELDS)}, json_body={"ids": params.paper_ids}) papers = response if isinstance(response, list) else response.get("data", []) if params.response_format == ResponseFormat.JSON: return json.dumps({"requested": len(params.paper_ids), "retrieved": len(papers), "papers": papers}, indent=2) lines = [f"## Bulk Retrieval", f"**Requested:** {len(params.paper_ids)} | **Retrieved:** {len(papers)}", ""] for paper in papers: if paper: lines.append(_format_paper_markdown(paper)) return "\n".join(lines) # ═══════════════════════════════════════════════════════════════════════════════ # ENTRY POINT # ═══════════════════════════════════════════════════════════════════════════════ def main(): """Run the MCP server.""" if not SEMANTIC_SCHOLAR_API_KEY: logger.warning("SEMANTIC_SCHOLAR_API_KEY not set. Using rate-limited public access (1 req/sec).") mcp.run() if __name__ == "__main__": main()

Implementation Reference

Latest Blog Posts

Model Context Protocol Proxies: Enabling Enterprise Control with Virtual MCPs
By Om-Shree-0709 on December 9, 2025.
AI Security
Virtual MCP
Kubernetes Operator
The State of MCP in 2025: Who's Building What and Why It Matters
By punkpeye on December 7, 2025.
mcp
startups
MCP hosting with persistent storage
By punkpeye on December 6, 2025.
changelog

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/smaniches/semantic-scholar-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server