semantic_scholar_search_papers
Search academic papers using Semantic Scholar's database with boolean operators, year filters, field restrictions, and citation thresholds to find relevant research.
Instructions
Search for academic papers. Supports boolean operators (AND, OR, NOT), phrase search with quotes.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| params | Yes |
Implementation Reference
- The handler function search_papers decorated with @mcp.tool(name="semantic_scholar_search_papers"). Implements the core logic: builds API params from input, calls Semantic Scholar paper/search endpoint via _make_request, formats results as markdown or JSON.@mcp.tool(name="semantic_scholar_search_papers") async def search_papers(params: PaperSearchInput) -> str: """Search for academic papers. Supports boolean operators (AND, OR, NOT), phrase search with quotes.""" logger.info(f"Searching: {params.query}") api_params = {"query": params.query, "offset": params.offset, "limit": params.limit, "fields": ",".join(PAPER_FIELDS)} if params.year: api_params["year"] = params.year if params.fields_of_study: api_params["fieldsOfStudy"] = ",".join(params.fields_of_study) if params.publication_types: api_params["publicationTypes"] = ",".join(params.publication_types) if params.open_access_only: api_params["openAccessPdf"] = "" if params.min_citation_count: api_params["minCitationCount"] = params.min_citation_count response = await _make_request("GET", "paper/search", params=api_params) total, papers = response.get("total", 0), response.get("data", []) if params.response_format == ResponseFormat.JSON: return json.dumps({"query": params.query, "total": total, "papers": papers}, indent=2) lines = [f"## Search Results: \"{params.query}\"", f"**Found:** {total} papers (showing {params.offset + 1}-{params.offset + len(papers)})", ""] for paper in papers: lines.append(_format_paper_markdown(paper)) if total > params.offset + len(papers): lines.append(f"*Use offset={params.offset + params.limit} to see more results*") return "\n".join(lines)
- Pydantic BaseModel defining the input schema for the tool, including query, filters, pagination, and output format options.class PaperSearchInput(BaseModel): model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") query: str = Field(..., description="Search query", min_length=1, max_length=500) year: Optional[str] = Field(default=None, description="Year filter: '2024', '2020-2024', '2020-'") fields_of_study: Optional[List[str]] = Field(default=None, description="Filter by fields: ['Computer Science', 'Biology']") publication_types: Optional[List[str]] = Field(default=None, description="Filter: 'Review', 'JournalArticle'") open_access_only: bool = Field(default=False, description="Only return open access papers") min_citation_count: Optional[int] = Field(default=None, description="Minimum citations", ge=0) limit: int = Field(default=10, description="Max results (1-100)", ge=1, le=100) offset: int = Field(default=0, description="Pagination offset", ge=0) response_format: ResponseFormat = Field(default=ResponseFormat.MARKDOWN, description="Output format")
- Helper function to make HTTP requests to Semantic Scholar API, handles errors and authentication via API key.async def _make_request( method: str, endpoint: str, params: Optional[Dict] = None, json_body: Optional[Dict] = None ) -> Dict[str, Any]: url = f"{SEMANTIC_SCHOLAR_API_BASE}/{endpoint}" async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client: try: if method == "GET": resp = await client.get(url, params=params, headers=_get_headers()) else: resp = await client.post(url, params=params, json=json_body, headers=_get_headers()) resp.raise_for_status() return resp.json() except httpx.HTTPStatusError as e: _handle_error(e.response.status_code) except httpx.TimeoutException: raise Exception("Request timed out") return {}
- Helper function to format individual paper data into a readable Markdown string, used in the tool's output.def _format_paper_markdown(paper: Dict[str, Any]) -> str: lines = [] title = paper.get("title", "Unknown Title") year = paper.get("year", "N/A") lines.append(f"### {title} ({year})") authors = paper.get("authors", []) if authors: names = [a.get("name", "?") for a in authors[:5]] if len(authors) > 5: names.append(f"... +{len(authors)-5} more") lines.append(f"**Authors:** {', '.join(names)}") venue = paper.get("venue") or (paper.get("publicationVenue") or {}).get("name") if venue: lines.append(f"**Venue:** {venue}") citations = paper.get("citationCount", 0) influential = paper.get("influentialCitationCount", 0) lines.append(f"**Citations:** {citations} ({influential} influential)") pdf_info = paper.get("openAccessPdf") or {} if pdf_info.get("url"): lines.append(f"**Open Access:** [PDF]({pdf_info['url']})") fields = paper.get("fieldsOfStudy") or [] if fields: lines.append(f"**Fields:** {', '.join(fields[:5])}") tldr = paper.get("tldr") or {} if tldr.get("text"): lines.append(f"**TL;DR:** {tldr['text']}") abstract = paper.get("abstract") if abstract: lines.append(f"**Abstract:** {abstract[:500]}..." if len(abstract) > 500 else f"**Abstract:** {abstract}") ext_ids = paper.get("externalIds") or {} ids = [] if ext_ids.get("DOI"): ids.append(f"DOI: {ext_ids['DOI']}") if ext_ids.get("ArXiv"): ids.append(f"ArXiv: {ext_ids['ArXiv']}") if ext_ids.get("PubMed"): ids.append(f"PMID: {ext_ids['PubMed']}") if ids: lines.append(f"**IDs:** {', '.join(ids)}") if paper.get("url"): lines.append(f"**Link:** [{paper.get('paperId')}]({paper['url']})") lines.append("") return "\n".join(lines)
- Constant list of fields requested from the Semantic Scholar API for comprehensive paper metadata.PAPER_FIELDS: List[str] = [ "paperId", "corpusId", "url", "title", "abstract", "venue", "publicationVenue", "year", "referenceCount", "citationCount", "influentialCitationCount", "isOpenAccess", "openAccessPdf", "fieldsOfStudy", "s2FieldsOfStudy", "publicationTypes", "publicationDate", "journal", "citationStyles", "authors", "externalIds", "tldr" ]