Skip to main content
Glama
smaniches

Semantic Scholar MCP Server

by smaniches

semantic_scholar_search_papers

Search academic papers using Semantic Scholar's database with boolean operators, year filters, field restrictions, and citation thresholds to find relevant research.

Instructions

Search for academic papers. Supports boolean operators (AND, OR, NOT), phrase search with quotes.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
paramsYes

Implementation Reference

  • The handler function search_papers decorated with @mcp.tool(name="semantic_scholar_search_papers"). Implements the core logic: builds API params from input, calls Semantic Scholar paper/search endpoint via _make_request, formats results as markdown or JSON.
    @mcp.tool(name="semantic_scholar_search_papers") async def search_papers(params: PaperSearchInput) -> str: """Search for academic papers. Supports boolean operators (AND, OR, NOT), phrase search with quotes.""" logger.info(f"Searching: {params.query}") api_params = {"query": params.query, "offset": params.offset, "limit": params.limit, "fields": ",".join(PAPER_FIELDS)} if params.year: api_params["year"] = params.year if params.fields_of_study: api_params["fieldsOfStudy"] = ",".join(params.fields_of_study) if params.publication_types: api_params["publicationTypes"] = ",".join(params.publication_types) if params.open_access_only: api_params["openAccessPdf"] = "" if params.min_citation_count: api_params["minCitationCount"] = params.min_citation_count response = await _make_request("GET", "paper/search", params=api_params) total, papers = response.get("total", 0), response.get("data", []) if params.response_format == ResponseFormat.JSON: return json.dumps({"query": params.query, "total": total, "papers": papers}, indent=2) lines = [f"## Search Results: \"{params.query}\"", f"**Found:** {total} papers (showing {params.offset + 1}-{params.offset + len(papers)})", ""] for paper in papers: lines.append(_format_paper_markdown(paper)) if total > params.offset + len(papers): lines.append(f"*Use offset={params.offset + params.limit} to see more results*") return "\n".join(lines)
  • Pydantic BaseModel defining the input schema for the tool, including query, filters, pagination, and output format options.
    class PaperSearchInput(BaseModel): model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") query: str = Field(..., description="Search query", min_length=1, max_length=500) year: Optional[str] = Field(default=None, description="Year filter: '2024', '2020-2024', '2020-'") fields_of_study: Optional[List[str]] = Field(default=None, description="Filter by fields: ['Computer Science', 'Biology']") publication_types: Optional[List[str]] = Field(default=None, description="Filter: 'Review', 'JournalArticle'") open_access_only: bool = Field(default=False, description="Only return open access papers") min_citation_count: Optional[int] = Field(default=None, description="Minimum citations", ge=0) limit: int = Field(default=10, description="Max results (1-100)", ge=1, le=100) offset: int = Field(default=0, description="Pagination offset", ge=0) response_format: ResponseFormat = Field(default=ResponseFormat.MARKDOWN, description="Output format")
  • Helper function to make HTTP requests to Semantic Scholar API, handles errors and authentication via API key.
    async def _make_request( method: str, endpoint: str, params: Optional[Dict] = None, json_body: Optional[Dict] = None ) -> Dict[str, Any]: url = f"{SEMANTIC_SCHOLAR_API_BASE}/{endpoint}" async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client: try: if method == "GET": resp = await client.get(url, params=params, headers=_get_headers()) else: resp = await client.post(url, params=params, json=json_body, headers=_get_headers()) resp.raise_for_status() return resp.json() except httpx.HTTPStatusError as e: _handle_error(e.response.status_code) except httpx.TimeoutException: raise Exception("Request timed out") return {}
  • Helper function to format individual paper data into a readable Markdown string, used in the tool's output.
    def _format_paper_markdown(paper: Dict[str, Any]) -> str: lines = [] title = paper.get("title", "Unknown Title") year = paper.get("year", "N/A") lines.append(f"### {title} ({year})") authors = paper.get("authors", []) if authors: names = [a.get("name", "?") for a in authors[:5]] if len(authors) > 5: names.append(f"... +{len(authors)-5} more") lines.append(f"**Authors:** {', '.join(names)}") venue = paper.get("venue") or (paper.get("publicationVenue") or {}).get("name") if venue: lines.append(f"**Venue:** {venue}") citations = paper.get("citationCount", 0) influential = paper.get("influentialCitationCount", 0) lines.append(f"**Citations:** {citations} ({influential} influential)") pdf_info = paper.get("openAccessPdf") or {} if pdf_info.get("url"): lines.append(f"**Open Access:** [PDF]({pdf_info['url']})") fields = paper.get("fieldsOfStudy") or [] if fields: lines.append(f"**Fields:** {', '.join(fields[:5])}") tldr = paper.get("tldr") or {} if tldr.get("text"): lines.append(f"**TL;DR:** {tldr['text']}") abstract = paper.get("abstract") if abstract: lines.append(f"**Abstract:** {abstract[:500]}..." if len(abstract) > 500 else f"**Abstract:** {abstract}") ext_ids = paper.get("externalIds") or {} ids = [] if ext_ids.get("DOI"): ids.append(f"DOI: {ext_ids['DOI']}") if ext_ids.get("ArXiv"): ids.append(f"ArXiv: {ext_ids['ArXiv']}") if ext_ids.get("PubMed"): ids.append(f"PMID: {ext_ids['PubMed']}") if ids: lines.append(f"**IDs:** {', '.join(ids)}") if paper.get("url"): lines.append(f"**Link:** [{paper.get('paperId')}]({paper['url']})") lines.append("") return "\n".join(lines)
  • Constant list of fields requested from the Semantic Scholar API for comprehensive paper metadata.
    PAPER_FIELDS: List[str] = [ "paperId", "corpusId", "url", "title", "abstract", "venue", "publicationVenue", "year", "referenceCount", "citationCount", "influentialCitationCount", "isOpenAccess", "openAccessPdf", "fieldsOfStudy", "s2FieldsOfStudy", "publicationTypes", "publicationDate", "journal", "citationStyles", "authors", "externalIds", "tldr" ]

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/smaniches/semantic-scholar-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server