Semantic Scholar MCP Server
by YUZongmin
"""
Paper-related API endpoints for the Semantic Scholar API.
"""
from typing import Dict, List, Optional
from fastmcp import Context
import httpx
# Import mcp from centralized location instead of server
from ..mcp import mcp
from ..config import PaperFields, CitationReferenceFields, AuthorDetailFields, Config, ErrorType
from ..utils.http import make_request, get_api_key
from ..utils.errors import create_error_response
@mcp.tool()
async def paper_relevance_search(
context: Context,
query: str,
fields: Optional[List[str]] = None,
publication_types: Optional[List[str]] = None,
open_access_pdf: bool = False,
min_citation_count: Optional[int] = None,
year: Optional[str] = None,
venue: Optional[List[str]] = None,
fields_of_study: Optional[List[str]] = None,
offset: int = 0,
limit: int = 10
) -> Dict:
"""
Search for papers on Semantic Scholar using relevance-based ranking.
This endpoint is optimized for finding the most relevant papers matching a text query.
Results are sorted by relevance score.
Args:
query (str): A text query to search for. The query will be matched against paper titles,
abstracts, venue names, and author names.
fields (Optional[List[str]]): List of fields to return for each paper.
paperId and title are always returned.
publication_types (Optional[List[str]]): Filter by publication types.
open_access_pdf (bool): If True, only include papers with a public PDF.
Default: False
min_citation_count (Optional[int]): Minimum number of citations required.
year (Optional[str]): Filter by publication year. Supports several formats:
- Single year: "2019"
- Year range: "2016-2020"
- Since year: "2010-"
- Until year: "-2015"
venue (Optional[List[str]]): Filter by publication venues.
Accepts full venue names or ISO4 abbreviations.
fields_of_study (Optional[List[str]]): Filter by fields of study.
offset (int): Number of results to skip for pagination.
Default: 0
limit (int): Maximum number of results to return.
Default: 10
Maximum: 100
Returns:
Dict: {
"total": int, # Total number of papers matching the query
"offset": int, # Current offset in the results
"next": int, # Offset for the next page of results (if available)
"data": List[Dict] # List of papers with requested fields
}
"""
if not query.strip():
return create_error_response(
ErrorType.VALIDATION,
"Query string cannot be empty"
)
# Validate and prepare fields
if fields is None:
fields = PaperFields.DEFAULT
else:
invalid_fields = set(fields) - PaperFields.VALID_FIELDS
if invalid_fields:
return create_error_response(
ErrorType.VALIDATION,
f"Invalid fields: {', '.join(invalid_fields)}",
{"valid_fields": list(PaperFields.VALID_FIELDS)}
)
# Validate and prepare parameters
limit = min(limit, 100)
params = {
"query": query,
"offset": offset,
"limit": limit,
"fields": ",".join(fields)
}
# Add optional filters
if publication_types:
params["publicationTypes"] = ",".join(publication_types)
if open_access_pdf:
params["openAccessPdf"] = "true"
if min_citation_count is not None:
params["minCitationCount"] = min_citation_count
if year:
params["year"] = year
if venue:
params["venue"] = ",".join(venue)
if fields_of_study:
params["fieldsOfStudy"] = ",".join(fields_of_study)
return await make_request("/paper/search", params)
@mcp.tool()
async def paper_bulk_search(
context: Context,
query: Optional[str] = None,
token: Optional[str] = None,
fields: Optional[List[str]] = None,
sort: Optional[str] = None,
publication_types: Optional[List[str]] = None,
open_access_pdf: bool = False,
min_citation_count: Optional[int] = None,
publication_date_or_year: Optional[str] = None,
year: Optional[str] = None,
venue: Optional[List[str]] = None,
fields_of_study: Optional[List[str]] = None
) -> Dict:
"""
Bulk search for papers with advanced filtering and sorting options.
Intended for retrieving large sets of papers efficiently.
Args:
query (Optional[str]): Text query to match against paper title and abstract.
Supports boolean logic with +, |, -, ", *, (), and ~N.
token (Optional[str]): Continuation token for pagination
fields (Optional[List[str]]): Fields to return for each paper
paperId is always returned
Default: paperId and title only
sort (Optional[str]): Sort order in format 'field:order'
Fields: paperId, publicationDate, citationCount
Order: asc (default), desc
Default: 'paperId:asc'
publication_types (Optional[List[str]]): Filter by publication types
open_access_pdf (bool): Only include papers with public PDF
min_citation_count (Optional[int]): Minimum citation threshold
publication_date_or_year (Optional[str]): Date/year range filter
Format: <startDate>:<endDate> in YYYY-MM-DD
year (Optional[str]): Publication year filter
Examples: '2019', '2016-2020', '2010-', '-2015'
venue (Optional[List[str]]): Filter by publication venues
fields_of_study (Optional[List[str]]): Filter by fields of study
Returns:
Dict: {
'total': int, # Total matching papers
'token': str, # Continuation token for next batch
'data': List[Dict] # Papers with requested fields
}
"""
# Build request parameters
params = {}
# Add query if provided
if query:
params["query"] = query.strip()
# Add continuation token if provided
if token:
params["token"] = token
# Add fields if provided
if fields:
# Validate fields
invalid_fields = set(fields) - PaperFields.VALID_FIELDS
if invalid_fields:
return create_error_response(
ErrorType.VALIDATION,
f"Invalid fields: {', '.join(invalid_fields)}",
{"valid_fields": list(PaperFields.VALID_FIELDS)}
)
params["fields"] = ",".join(fields)
# Add sort if provided
if sort:
# Validate sort format
valid_sort_fields = ["paperId", "publicationDate", "citationCount"]
valid_sort_orders = ["asc", "desc"]
try:
field, order = sort.split(":")
if field not in valid_sort_fields:
return create_error_response(
ErrorType.VALIDATION,
f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields)}"
)
if order not in valid_sort_orders:
return create_error_response(
ErrorType.VALIDATION,
f"Invalid sort order. Must be one of: {', '.join(valid_sort_orders)}"
)
params["sort"] = sort
except ValueError:
return create_error_response(
ErrorType.VALIDATION,
"Sort must be in format 'field:order'"
)
# Add publication types if provided
if publication_types:
valid_types = {
"Review", "JournalArticle", "CaseReport", "ClinicalTrial",
"Conference", "Dataset", "Editorial", "LettersAndComments",
"MetaAnalysis", "News", "Study", "Book", "BookSection"
}
invalid_types = set(publication_types) - valid_types
if invalid_types:
return create_error_response(
ErrorType.VALIDATION,
f"Invalid publication types: {', '.join(invalid_types)}",
{"valid_types": list(valid_types)}
)
params["publicationTypes"] = ",".join(publication_types)
# Add open access PDF filter
if open_access_pdf:
params["openAccessPdf"] = "true"
# Add minimum citation count if provided
if min_citation_count is not None:
if min_citation_count < 0:
return create_error_response(
ErrorType.VALIDATION,
"Minimum citation count cannot be negative"
)
params["minCitationCount"] = str(min_citation_count)
# Add publication date/year if provided
if publication_date_or_year:
params["publicationDateOrYear"] = publication_date_or_year
elif year:
params["year"] = year
# Add venue filter if provided
if venue:
params["venue"] = ",".join(venue)
# Add fields of study filter if provided
if fields_of_study:
valid_fields = {
"Computer Science", "Medicine", "Chemistry", "Biology",
"Materials Science", "Physics", "Geology", "Psychology",
"Art", "History", "Geography", "Sociology", "Business",
"Political Science", "Economics", "Philosophy", "Mathematics",
"Engineering", "Environmental Science", "Agricultural and Food Sciences",
"Education", "Law", "Linguistics"
}
invalid_fields = set(fields_of_study) - valid_fields
if invalid_fields:
return create_error_response(
ErrorType.VALIDATION,
f"Invalid fields of study: {', '.join(invalid_fields)}",
{"valid_fields": list(valid_fields)}
)
params["fieldsOfStudy"] = ",".join(fields_of_study)
# Make the API request
result = await make_request("/paper/search/bulk", params)
# Handle potential errors
if isinstance(result, Dict) and "error" in result:
return result
return result
@mcp.tool()
async def paper_title_search(
context: Context,
query: str,
fields: Optional[List[str]] = None,
publication_types: Optional[List[str]] = None,
open_access_pdf: bool = False,
min_citation_count: Optional[int] = None,
year: Optional[str] = None,
venue: Optional[List[str]] = None,
fields_of_study: Optional[List[str]] = None
) -> Dict:
"""
Find a single paper by title match. This endpoint is optimized for finding a specific paper
by its title and returns the best matching paper based on title similarity.
Args:
query (str): The title text to search for. The query will be matched against paper titles
to find the closest match.
fields (Optional[List[str]]): List of fields to return for the paper.
paperId and title are always returned.
publication_types (Optional[List[str]]): Filter by publication types.
open_access_pdf (bool): If True, only include papers with a public PDF.
Default: False
min_citation_count (Optional[int]): Minimum number of citations required.
year (Optional[str]): Filter by publication year. Supports several formats:
- Single year: "2019"
- Year range: "2016-2020"
- Since year: "2010-"
- Until year: "-2015"
venue (Optional[List[str]]): Filter by publication venues.
Accepts full venue names or ISO4 abbreviations.
fields_of_study (Optional[List[str]]): Filter by fields of study.
Returns:
Dict: {
"paperId": str, # Semantic Scholar Paper ID
"title": str, # Paper title
"matchScore": float, # Similarity score between query and matched title
... # Additional requested fields
}
Returns error response if no matching paper is found.
"""
if not query.strip():
return create_error_response(
ErrorType.VALIDATION,
"Query string cannot be empty"
)
# Validate and prepare fields
if fields is None:
fields = PaperFields.DEFAULT
else:
invalid_fields = set(fields) - PaperFields.VALID_FIELDS
if invalid_fields:
return create_error_response(
ErrorType.VALIDATION,
f"Invalid fields: {', '.join(invalid_fields)}",
{"valid_fields": list(PaperFields.VALID_FIELDS)}
)
# Build base parameters
params = {"query": query}
# Add optional parameters
if fields:
params["fields"] = ",".join(fields)
if publication_types:
params["publicationTypes"] = ",".join(publication_types)
if open_access_pdf:
params["openAccessPdf"] = "true"
if min_citation_count is not None:
params["minCitationCount"] = str(min_citation_count)
if year:
params["year"] = year
if venue:
params["venue"] = ",".join(venue)
if fields_of_study:
params["fieldsOfStudy"] = ",".join(fields_of_study)
result = await make_request("/paper/search/match", params)
# Handle specific error cases
if isinstance(result, Dict):
if "error" in result:
error_msg = result["error"].get("message", "")
if "404" in error_msg:
return create_error_response(
ErrorType.VALIDATION,
"No matching paper found",
{"original_query": query}
)
return result
return result
@mcp.tool()
async def paper_details(
context: Context,
paper_id: str,
fields: Optional[List[str]] = None
) -> Dict:
"""
Get details about a paper using various types of identifiers.
This endpoint provides comprehensive metadata about a paper.
Args:
paper_id (str): Paper identifier in one of the following formats:
- Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
- DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
- ARXIV:<id> (e.g., "ARXIV:2106.15928")
- MAG:<id> (e.g., "MAG:112218234")
- ACL:<id> (e.g., "ACL:W12-3903")
- PMID:<id> (e.g., "PMID:19872477")
- PMCID:<id> (e.g., "PMCID:2323736")
- URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
fields (Optional[List[str]]): List of fields to return.
paperId is always returned.
Returns:
Dict: Paper details with requested fields.
Always includes paperId.
Returns error response if paper not found.
"""
if not paper_id.strip():
return create_error_response(
ErrorType.VALIDATION,
"Paper ID cannot be empty"
)
# Build request parameters
params = {}
if fields:
params["fields"] = ",".join(fields)
# Make the API request
result = await make_request(f"/paper/{paper_id}", params)
# Handle potential errors
if isinstance(result, Dict) and "error" in result:
error_msg = result["error"].get("message", "")
if "404" in error_msg:
return create_error_response(
ErrorType.VALIDATION,
"Paper not found",
{"paper_id": paper_id}
)
return result
return result
@mcp.tool()
async def paper_batch_details(
context: Context,
paper_ids: List[str],
fields: Optional[str] = None
) -> Dict:
"""
Get details for multiple papers in a single batch request.
This endpoint is optimized for efficiently retrieving details about known papers.
Args:
paper_ids (List[str]): List of paper identifiers. Each ID can be in any of these formats:
- Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
- DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
- ARXIV:<id> (e.g., "ARXIV:2106.15928")
- MAG:<id> (e.g., "MAG:112218234")
- ACL:<id> (e.g., "ACL:W12-3903")
- PMID:<id> (e.g., "PMID:19872477")
- PMCID:<id> (e.g., "PMCID:2323736")
- URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
Maximum: 500 IDs per request
fields (Optional[str]): Comma-separated list of fields to return for each paper.
paperId is always returned.
Returns:
List[Dict]: List of paper details with requested fields.
- Results maintain the same order as input paper_ids
- Invalid or not found paper IDs return null in the results
- Each paper object contains the requested fields
- paperId is always included in each paper object
"""
# Validate inputs
if not paper_ids:
return create_error_response(
ErrorType.VALIDATION,
"Paper IDs list cannot be empty"
)
if len(paper_ids) > 500:
return create_error_response(
ErrorType.VALIDATION,
"Cannot process more than 500 paper IDs at once",
{"max_papers": 500, "received": len(paper_ids)}
)
# Validate fields if provided
if fields:
field_list = fields.split(",")
invalid_fields = set(field_list) - PaperFields.VALID_FIELDS
if invalid_fields:
return create_error_response(
ErrorType.VALIDATION,
f"Invalid fields: {', '.join(invalid_fields)}",
{"valid_fields": list(PaperFields.VALID_FIELDS)}
)
# Build request parameters
params = {}
if fields:
params["fields"] = fields
# Make POST request with proper structure
try:
async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client:
api_key = get_api_key()
headers = {"x-api-key": api_key} if api_key else {}
response = await client.post(
f"{Config.BASE_URL}/paper/batch",
params=params,
json={"ids": paper_ids},
headers=headers
)
response.raise_for_status()
return response.json()
except httpx.HTTPStatusError as e:
if e.response.status_code == 429:
return create_error_response(
ErrorType.RATE_LIMIT,
"Rate limit exceeded",
{"retry_after": e.response.headers.get("retry-after")}
)
return create_error_response(
ErrorType.API_ERROR,
f"HTTP error: {e.response.status_code}",
{"response": e.response.text}
)
except httpx.TimeoutException:
return create_error_response(
ErrorType.TIMEOUT,
f"Request timed out after {Config.TIMEOUT} seconds"
)
except Exception as e:
return create_error_response(
ErrorType.API_ERROR,
str(e)
)
@mcp.tool()
async def paper_authors(
context: Context,
paper_id: str,
fields: Optional[List[str]] = None,
offset: int = 0,
limit: int = 100
) -> Dict:
"""
Get details about the authors of a paper with pagination support.
This endpoint provides author information and their contributions.
Args:
paper_id (str): Paper identifier in one of the following formats:
- Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
- DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
- ARXIV:<id> (e.g., "ARXIV:2106.15928")
- MAG:<id> (e.g., "MAG:112218234")
- ACL:<id> (e.g., "ACL:W12-3903")
- PMID:<id> (e.g., "PMID:19872477")
- PMCID:<id> (e.g., "PMCID:2323736")
- URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
fields (Optional[List[str]]): List of fields to return for each author.
authorId is always returned.
offset (int): Number of authors to skip for pagination.
Default: 0
limit (int): Maximum number of authors to return.
Default: 100
Maximum: 1000
Returns:
Dict: {
"offset": int, # Current offset in the results
"next": int, # Next offset (if more results available)
"data": List[Dict] # List of authors with requested fields
}
"""
if not paper_id.strip():
return create_error_response(
ErrorType.VALIDATION,
"Paper ID cannot be empty"
)
# Validate limit
if limit > 1000:
return create_error_response(
ErrorType.VALIDATION,
"Limit cannot exceed 1000",
{"max_limit": 1000}
)
# Validate fields
if fields:
invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS
if invalid_fields:
return create_error_response(
ErrorType.VALIDATION,
f"Invalid fields: {', '.join(invalid_fields)}",
{"valid_fields": list(AuthorDetailFields.VALID_FIELDS)}
)
# Build request parameters
params = {
"offset": offset,
"limit": limit
}
if fields:
params["fields"] = ",".join(fields)
# Make the API request
result = await make_request(f"/paper/{paper_id}/authors", params)
# Handle potential errors
if isinstance(result, Dict) and "error" in result:
error_msg = result["error"].get("message", "")
if "404" in error_msg:
return create_error_response(
ErrorType.VALIDATION,
"Paper not found",
{"paper_id": paper_id}
)
return result
return result
@mcp.tool()
async def paper_citations(
context: Context,
paper_id: str,
fields: Optional[List[str]] = None,
offset: int = 0,
limit: int = 100
) -> Dict:
"""
Get papers that cite the specified paper (papers where this paper appears in their bibliography).
This endpoint provides detailed citation information including citation contexts.
Args:
paper_id (str): Paper identifier in one of the following formats:
- Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
- DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
- ARXIV:<id> (e.g., "ARXIV:2106.15928")
- MAG:<id> (e.g., "MAG:112218234")
- ACL:<id> (e.g., "ACL:W12-3903")
- PMID:<id> (e.g., "PMID:19872477")
- PMCID:<id> (e.g., "PMCID:2323736")
- URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
fields (Optional[List[str]]): List of fields to return for each citing paper.
paperId is always returned.
offset (int): Number of citations to skip for pagination.
Default: 0
limit (int): Maximum number of citations to return.
Default: 100
Maximum: 1000
Returns:
Dict: {
"offset": int, # Current offset in the results
"next": int, # Next offset (if more results available)
"data": List[Dict] # List of citing papers with requested fields
}
"""
if not paper_id.strip():
return create_error_response(
ErrorType.VALIDATION,
"Paper ID cannot be empty"
)
# Validate limit
if limit > 1000:
return create_error_response(
ErrorType.VALIDATION,
"Limit cannot exceed 1000",
{"max_limit": 1000}
)
# Validate fields
if fields:
invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS
if invalid_fields:
return create_error_response(
ErrorType.VALIDATION,
f"Invalid fields: {', '.join(invalid_fields)}",
{"valid_fields": list(CitationReferenceFields.VALID_FIELDS)}
)
# Build request parameters
params = {
"offset": offset,
"limit": limit
}
if fields:
params["fields"] = ",".join(fields)
# Make the API request
result = await make_request(f"/paper/{paper_id}/citations", params)
# Handle potential errors
if isinstance(result, Dict) and "error" in result:
error_msg = result["error"].get("message", "")
if "404" in error_msg:
return create_error_response(
ErrorType.VALIDATION,
"Paper not found",
{"paper_id": paper_id}
)
return result
return result
@mcp.tool()
async def paper_references(
context: Context,
paper_id: str,
fields: Optional[List[str]] = None,
offset: int = 0,
limit: int = 100
) -> Dict:
"""
Get papers cited by the specified paper (papers appearing in this paper's bibliography).
This endpoint provides detailed reference information including citation contexts.
Args:
paper_id (str): Paper identifier in one of the following formats:
- Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
- DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
- ARXIV:<id> (e.g., "ARXIV:2106.15928")
- MAG:<id> (e.g., "MAG:112218234")
- ACL:<id> (e.g., "ACL:W12-3903")
- PMID:<id> (e.g., "PMID:19872477")
- PMCID:<id> (e.g., "PMCID:2323736")
- URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
fields (Optional[List[str]]): List of fields to return for each referenced paper.
paperId is always returned.
offset (int): Number of references to skip for pagination.
Default: 0
limit (int): Maximum number of references to return.
Default: 100
Maximum: 1000
Returns:
Dict: {
"offset": int, # Current offset in the results
"next": int, # Next offset (if more results available)
"data": List[Dict] # List of referenced papers with requested fields
}
"""
if not paper_id.strip():
return create_error_response(
ErrorType.VALIDATION,
"Paper ID cannot be empty"
)
# Validate limit
if limit > 1000:
return create_error_response(
ErrorType.VALIDATION,
"Limit cannot exceed 1000",
{"max_limit": 1000}
)
# Validate fields
if fields:
invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS
if invalid_fields:
return create_error_response(
ErrorType.VALIDATION,
f"Invalid fields: {', '.join(invalid_fields)}",
{"valid_fields": list(CitationReferenceFields.VALID_FIELDS)}
)
# Build request parameters
params = {
"offset": offset,
"limit": limit
}
if fields:
params["fields"] = ",".join(fields)
# Make the API request
result = await make_request(f"/paper/{paper_id}/references", params)
# Handle potential errors
if isinstance(result, Dict) and "error" in result:
error_msg = result["error"].get("message", "")
if "404" in error_msg:
return create_error_response(
ErrorType.VALIDATION,
"Paper not found",
{"paper_id": paper_id}
)
return result
return result