Skip to main content
Glama
by fegizii
server.py25.1 kB
"""Semantic Scholar MCP Server.""" import os import re from pathlib import Path from typing import Any, Dict, List, Optional from urllib.parse import quote import httpx from mcp.server.fastmcp import FastMCP # Initialize MCP server mcp = FastMCP("semantic-scholar") # Constants BASE_URL = "https://api.semanticscholar.org/graph/v1" API_TIMEOUT = 30.0 USER_AGENT_VERSION = "1.0" # Get API key from environment variable API_KEY = os.getenv("SEMANTIC_SCHOLAR_API_KEY") async def make_api_request( endpoint: str, params: Optional[Dict[str, Any]] = None, method: str = "GET" ) -> Optional[Dict[str, Any]]: """Make a request to the Semantic Scholar API.""" url = f"{BASE_URL}/{endpoint.lstrip('/')}" headers = { "Accept": "application/json", "User-Agent": f"semantic-scholar-mcp/{USER_AGENT_VERSION}", } if API_KEY: headers["x-api-key"] = API_KEY try: async with httpx.AsyncClient(timeout=API_TIMEOUT) as client: if method == "GET": response = await client.get(url, headers=headers, params=params) elif method == "POST": response = await client.post(url, headers=headers, json=params) else: raise ValueError(f"Unsupported HTTP method: {method}") response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: if e.response.status_code == 403: if not API_KEY: return { "error": "Rate limit exceeded. The shared public rate limit (1000 req/sec) may be exceeded. Get a free API key from https://www.semanticscholar.org/product/api for dedicated limits." } else: return { "error": f"API key may be invalid or rate limit exceeded: {str(e)}" } elif e.response.status_code == 429: return { "error": "Rate limit exceeded. Please wait a moment and try again, or get an API key for dedicated higher limits." } else: return {"error": f"HTTP error: {str(e)}"} except httpx.HTTPError as e: return {"error": f"HTTP error: {str(e)}"} except Exception as e: return {"error": f"Request failed: {str(e)}"} def format_paper(paper: Dict[str, Any]) -> str: """Format a paper for display.""" title = paper.get("title", "Unknown Title") authors = paper.get("authors", []) author_names = [author.get("name", "Unknown") for author in authors[:3]] author_str = ", ".join(author_names) if len(authors) > 3: author_str += f" (and {len(authors) - 3} others)" year = paper.get("year") year_str = f" ({year})" if year else "" venue = paper.get("venue", "") venue_str = f" - {venue}" if venue else "" citation_count = paper.get("citationCount", 0) paper_id = paper.get("paperId", "") return f"Title: {title}\nAuthors: {author_str}{year_str}{venue_str}\nCitations: {citation_count}\nPaper ID: {paper_id}" def format_author(author: Dict[str, Any]) -> str: """Format an author for display.""" name = author.get("name", "Unknown Name") author_id = author.get("authorId", "") paper_count = author.get("paperCount", 0) citation_count = author.get("citationCount", 0) h_index = author.get("hIndex", 0) return f"Name: {name}\nAuthor ID: {author_id}\nPapers: {paper_count}\nCitations: {citation_count}\nH-Index: {h_index}" @mcp.tool() async def search_papers( query: str, limit: int = 10, offset: int = 0, fields: Optional[str] = None, publication_types: Optional[str] = None, open_access_pdf: Optional[bool] = None, min_citation_count: Optional[int] = None, year: Optional[str] = None, venue: Optional[str] = None, ) -> str: """ Search for academic papers using Semantic Scholar. Args: query: Search query string limit: Maximum number of results (default: 10, max: 100) offset: Number of results to skip (default: 0) fields: Comma-separated list of fields to return publication_types: Filter by publication types open_access_pdf: Filter for papers with open access PDFs min_citation_count: Minimum citation count year: Publication year or year range (e.g., "2020-2023") venue: Publication venue Returns: Formatted search results """ params = {"query": query, "limit": min(limit, 100), "offset": offset} if fields: params["fields"] = fields else: params["fields"] = "paperId,title,authors,year,venue,citationCount,abstract" if publication_types: params["publicationTypes"] = publication_types if open_access_pdf is not None: params["openAccessPdf"] = str(open_access_pdf).lower() if min_citation_count is not None: params["minCitationCount"] = min_citation_count if year: params["year"] = year if venue: params["venue"] = venue result = await make_api_request("paper/search", params) if result is None: return "Error: Failed to fetch results" if "error" in result: return f"Error: {result['error']}" papers = result.get("data", []) total = result.get("total", 0) if not papers: return "No papers found matching your query." formatted_papers = [] for i, paper in enumerate(papers, 1): formatted_papers.append(f"{i}. {format_paper(paper)}") result_text = f"Found {total} total papers (showing {len(papers)}):\n\n" result_text += "\n\n".join(formatted_papers) return result_text @mcp.tool() async def get_paper(paper_id: str, fields: Optional[str] = None) -> str: """ Get detailed information about a specific paper. Args: paper_id: Paper ID (can be Semantic Scholar ID, DOI, ArXiv ID, etc.) fields: Comma-separated list of fields to return Returns: Detailed paper information """ params = {} if fields: params["fields"] = fields else: params["fields"] = ( "paperId,title,authors,year,venue,citationCount,abstract,references,citations,openAccessPdf" ) # URL encode the paper ID to handle DOIs and other special characters encoded_id = quote(paper_id, safe="") result = await make_api_request(f"paper/{encoded_id}", params) if result is None: return "Error: Failed to fetch paper" if "error" in result: return f"Error: {result['error']}" paper = result title = paper.get("title", "Unknown Title") authors = paper.get("authors", []) author_names = [author.get("name", "Unknown") for author in authors] year = paper.get("year", "Unknown") venue = paper.get("venue", "Unknown") citation_count = paper.get("citationCount", 0) abstract = paper.get("abstract", "No abstract available") references = paper.get("references", []) citations = paper.get("citations", []) open_access = paper.get("openAccessPdf") pdf_url = open_access.get("url") if open_access else "No open access PDF" result_text = f"""Title: {title} Authors: {', '.join(author_names)} Year: {year} Venue: {venue} Citations: {citation_count} Paper ID: {paper.get('paperId', 'Unknown')} Abstract: {abstract} References: {len(references)} Cited by: {len(citations)} Open Access PDF: {pdf_url}""" return result_text @mcp.tool() async def get_paper_batch(paper_ids: str, fields: Optional[str] = None) -> str: """ Get information for multiple papers in a single request. Args: paper_ids: Comma-separated list of paper IDs fields: Comma-separated list of fields to return Returns: Batch paper information """ id_list = [id.strip() for id in paper_ids.split(",")] params: Dict[str, Any] = {"ids": id_list} if fields: params["fields"] = fields else: params["fields"] = "paperId,title,authors,year,venue,citationCount,abstract" result = await make_api_request("paper/batch", params, method="POST") if result is None: return "Error: Failed to fetch papers" if "error" in result: return f"Error: {result['error']}" papers = result if isinstance(result, list) else result.get("data", []) if not papers: return "No papers found for the provided IDs." formatted_papers = [] for i, paper in enumerate(papers, 1): if paper is None: formatted_papers.append(f"{i}. Paper not found") elif isinstance(paper, dict): formatted_papers.append(f"{i}. {format_paper(paper)}") else: formatted_papers.append(f"{i}. Invalid paper data") result_text = f"Retrieved {len(papers)} papers:\n\n" result_text += "\n\n".join(formatted_papers) return result_text @mcp.tool() async def search_authors( query: str, limit: int = 10, offset: int = 0, fields: Optional[str] = None ) -> str: """ Search for authors by name. Args: query: Author name or search query limit: Maximum number of results (default: 10, max: 1000) offset: Number of results to skip (default: 0) fields: Comma-separated list of fields to return Returns: Formatted author search results """ params = {"query": query, "limit": min(limit, 1000), "offset": offset} if fields: params["fields"] = fields else: params["fields"] = "authorId,name,paperCount,citationCount,hIndex" result = await make_api_request("author/search", params) if result is None: return "Error: Failed to fetch authors" if "error" in result: return f"Error: {result['error']}" authors = result.get("data", []) total = result.get("total", 0) if not authors: return "No authors found matching your query." formatted_authors = [] for i, author in enumerate(authors, 1): formatted_authors.append(f"{i}. {format_author(author)}") result_text = f"Found {total} total authors (showing {len(authors)}):\n\n" result_text += "\n\n".join(formatted_authors) return result_text @mcp.tool() async def get_author(author_id: str, fields: Optional[str] = None) -> str: """ Get detailed information about a specific author. Args: author_id: Author ID fields: Comma-separated list of fields to return Returns: Detailed author information """ params = {} if fields: params["fields"] = fields else: params["fields"] = "authorId,name,paperCount,citationCount,hIndex,papers" result = await make_api_request(f"author/{author_id}", params) if result is None: return "Error: Failed to fetch author" if "error" in result: return f"Error: {result['error']}" author = result name = author.get("name", "Unknown Name") author_id = author.get("authorId", "") paper_count = author.get("paperCount", 0) citation_count = author.get("citationCount", 0) h_index = author.get("hIndex", 0) papers = author.get("papers", []) result_text = f"""Name: {name} Author ID: {author_id} Total Papers: {paper_count} Total Citations: {citation_count} H-Index: {h_index} Recent Papers ({len(papers)} shown):""" if papers: for i, paper in enumerate(papers[:10], 1): title = paper.get("title", "Unknown Title") year = paper.get("year", "Unknown") citations = paper.get("citationCount", 0) result_text += f"\n{i}. {title} ({year}) - {citations} citations" return result_text @mcp.tool() async def search_snippets(query: str, limit: int = 10, offset: int = 0) -> str: """ Search for text snippets across academic papers. Args: query: Search query for text snippets limit: Maximum number of results (default: 10, max: 100) offset: Number of results to skip (default: 0) Returns: Text snippets from papers """ params = {"query": query, "limit": min(limit, 100), "offset": offset} result = await make_api_request("snippet/search", params) if result is None: return "Error: Failed to fetch snippets" if "error" in result: return f"Error: {result['error']}" snippets = result.get("data", []) total = result.get("total", 0) if not snippets: return "No snippets found matching your query." formatted_snippets = [] for i, snippet in enumerate(snippets, 1): paper = snippet.get("paper", {}) title = paper.get("title", "Unknown Title") year = paper.get("year", "Unknown") text = snippet.get("text", "No text available") formatted_snippets.append(f"{i}. From: {title} ({year})\nSnippet: {text}") result_text = f"Found {total} total snippets (showing {len(snippets)}):\n\n" result_text += "\n\n".join(formatted_snippets) return result_text @mcp.tool() async def get_paper_citations( paper_id: str, limit: int = 10, offset: int = 0, fields: Optional[str] = None ) -> str: """ Get papers that cite a specific paper. Args: paper_id: Paper ID to get citations for limit: Maximum number of results (default: 10, max: 1000) offset: Number of results to skip (default: 0) fields: Comma-separated list of fields to return Returns: List of citing papers """ params: Dict[str, Any] = {"limit": min(limit, 1000), "offset": offset} if fields: params["fields"] = fields else: params["fields"] = "paperId,title,authors,year,venue,citationCount" encoded_id = quote(paper_id, safe="") result = await make_api_request(f"paper/{encoded_id}/citations", params) if result is None: return "Error: Failed to fetch citations" if "error" in result: return f"Error: {result['error']}" citations = result.get("data", []) total = result.get("total", 0) if not citations: return "No citations found for this paper." formatted_citations = [] for i, citation in enumerate(citations, 1): citing_paper = citation.get("citingPaper", {}) if citing_paper: formatted_citations.append(f"{i}. {format_paper(citing_paper)}") result_text = ( f"Found {total} total citations (showing {len(formatted_citations)}):\n\n" ) result_text += "\n\n".join(formatted_citations) return result_text @mcp.tool() async def get_paper_references( paper_id: str, limit: int = 10, offset: int = 0, fields: Optional[str] = None ) -> str: """ Get papers referenced by a specific paper. Args: paper_id: Paper ID to get references for limit: Maximum number of results (default: 10, max: 1000) offset: Number of results to skip (default: 0) fields: Comma-separated list of fields to return Returns: List of referenced papers """ params: Dict[str, Any] = {"limit": min(limit, 1000), "offset": offset} if fields: params["fields"] = fields else: params["fields"] = "paperId,title,authors,year,venue,citationCount" encoded_id = quote(paper_id, safe="") result = await make_api_request(f"paper/{encoded_id}/references", params) if result is None: return "Error: Failed to fetch references" if "error" in result: return f"Error: {result['error']}" references = result.get("data", []) total = result.get("total", 0) if not references: return "No references found for this paper." formatted_references = [] for i, reference in enumerate(references, 1): cited_paper = reference.get("citedPaper", {}) if cited_paper: formatted_references.append(f"{i}. {format_paper(cited_paper)}") result_text = ( f"Found {total} total references (showing {len(formatted_references)}):\n\n" ) result_text += "\n\n".join(formatted_references) return result_text @mcp.tool() async def get_citation_context(paper_id: str, citing_paper_id: str) -> str: """ Get the context in which one paper cites another. Args: paper_id: ID of the paper being cited citing_paper_id: ID of the paper doing the citing Returns: Citation context information """ encoded_paper_id = quote(paper_id, safe="") encoded_citing_id = quote(citing_paper_id, safe="") result = await make_api_request( f"paper/{encoded_paper_id}/citations/{encoded_citing_id}" ) if result is None: return "Error: Failed to fetch citation context" if "error" in result: return f"Error: {result['error']}" contexts = result.get("contexts", []) citing_paper = result.get("citingPaper", {}) cited_paper = result.get("citedPaper", {}) if not contexts: return "No citation context found." result_text = "Citation context:\n\n" result_text += f"Cited paper: {cited_paper.get('title', 'Unknown')}\n" result_text += f"Citing paper: {citing_paper.get('title', 'Unknown')}\n\n" for i, context in enumerate(contexts, 1): result_text += f"{i}. {context}\n" return result_text def create_safe_filename(title: str, max_length: int = 100) -> str: """Create a safe filename from paper title.""" # Remove/replace problematic characters safe_title = re.sub(r'[<>:"/\\|?*]', "", title) # Remove forbidden chars safe_title = re.sub(r"\s+", " ", safe_title) # Normalize whitespace safe_title = safe_title.strip() # Limit length if len(safe_title) > max_length: safe_title = safe_title[:max_length].rsplit(" ", 1)[0] # Break at word boundary return safe_title if safe_title else "Unknown_Paper" def set_pdf_metadata( file_path: Path, title: str, authors: List[Dict], year: Optional[int] ): """Set PDF metadata using PyPDF2 if available.""" try: from PyPDF2 import PdfReader, PdfWriter # Read the existing PDF with open(file_path, "rb") as f: reader = PdfReader(f) writer = PdfWriter() # Copy all pages for page in reader.pages: writer.add_page(page) # Create author string author_names = [ author.get("name", "") for author in authors if author.get("name") ] author_str = ", ".join(author_names[:5]) # Limit to first 5 authors if len(authors) > 5: author_str += " et al." # Set metadata metadata = { "/Title": title, "/Author": author_str, "/Creator": "Semantic Scholar MCP", "/Producer": "Semantic Scholar MCP", } if year: metadata["/CreationDate"] = f"D:{year}0101000000Z" writer.add_metadata(metadata) # Write back to file with open(file_path, "wb") as output_f: writer.write(output_f) return True except ImportError: # PyPDF2 not available - skip metadata setting return False except Exception as e: # Error setting metadata - file is still saved print(f"Warning: Could not set PDF metadata: {e}") return False @mcp.tool() async def download_paper_pdf(paper_id: str, download_path: Optional[str] = None) -> str: """ Download the PDF of a paper if available, using title as filename and setting metadata. Args: paper_id: Paper ID (Semantic Scholar ID, DOI, ArXiv ID, etc.) download_path: Directory to save the PDF (default: ~/Downloads/semantic_scholar_papers) Returns: Status message with download location or error """ # Get paper info including title, authors, year, and PDF URL paper_result = await make_api_request( f"paper/{quote(paper_id, safe='')}", {"fields": "paperId,title,authors,year,openAccessPdf"}, ) if paper_result is None: return "Error: Failed to fetch paper information" if "error" in paper_result: return f"Error: {paper_result['error']}" # Check if PDF is available open_access = paper_result.get("openAccessPdf") if not open_access or not open_access.get("url"): return "Error: No open access PDF available for this paper" pdf_url = open_access["url"] title = paper_result.get("title", "Unknown Paper") authors = paper_result.get("authors", []) year = paper_result.get("year") # paper_id from API response _ = paper_result.get("paperId", paper_id) # Set up download path if download_path is None: download_dir = Path.home() / "Downloads" / "semantic_scholar_papers" else: download_dir = Path(download_path) # Create directory if it doesn't exist download_dir.mkdir(parents=True, exist_ok=True) # Create filename from title safe_title = create_safe_filename(title) year_str = f" ({year})" if year else "" filename = f"{safe_title}{year_str}.pdf" file_path = download_dir / filename # Handle duplicate filenames counter = 1 original_file_path = file_path while file_path.exists(): stem = original_file_path.stem suffix = original_file_path.suffix file_path = original_file_path.parent / f"{stem} ({counter}){suffix}" counter += 1 try: async with httpx.AsyncClient(timeout=60.0) as client: headers = {"User-Agent": "semantic-scholar-mcp/1.0"} response = await client.get(pdf_url, headers=headers, follow_redirects=True) response.raise_for_status() # Check if it's actually a PDF content_type = response.headers.get("content-type", "") if "pdf" not in content_type.lower() and not pdf_url.lower().endswith( ".pdf" ): return f"Warning: Downloaded file may not be a PDF (Content-Type: {content_type})" # Write the PDF file with open(file_path, "wb") as f: f.write(response.content) file_size = len(response.content) / (1024 * 1024) # MB # Set PDF metadata metadata_set = set_pdf_metadata(file_path, title, authors, year) # Create author summary for output author_names = [author.get("name", "") for author in authors[:3]] author_summary = ", ".join(author_names) if len(authors) > 3: author_summary += f" and {len(authors) - 3} others" result = "✅ PDF downloaded successfully!\n\n" result += f"Title: {title}\n" result += f"Authors: {author_summary}\n" if year: result += f"Year: {year}\n" result += f"Saved to: {file_path}\n" result += f"File size: {file_size:.2f} MB\n" if metadata_set: result += "✅ PDF metadata set with title, authors, and year" else: result += "⚠️ PDF saved but metadata not set (install PyPDF2 for metadata support)" return result except httpx.HTTPError as e: return f"Error downloading PDF: {str(e)}" except Exception as e: return f"Error saving PDF: {str(e)}" @mcp.tool() async def get_paper_pdf_info(paper_id: str) -> str: """ Get PDF availability information for a paper. Args: paper_id: Paper ID (Semantic Scholar ID, DOI, ArXiv ID, etc.) Returns: PDF availability information """ encoded_id = quote(paper_id, safe="") result = await make_api_request( f"paper/{encoded_id}", {"fields": "paperId,title,openAccessPdf,externalIds"} ) if result is None: return "Error: Failed to fetch paper information" if "error" in result: return f"Error: {result['error']}" title = result.get("title", "Unknown Title") open_access = result.get("openAccessPdf") external_ids = result.get("externalIds", {}) result_text = f"PDF Information for: {title}\n\n" if open_access and open_access.get("url"): pdf_url = open_access["url"] result_text += "✅ Open Access PDF Available\n" result_text += f"URL: {pdf_url}\n" result_text += "Status: Ready for download\n\n" else: result_text += "❌ No Open Access PDF Available\n\n" # Check for potential alternative sources result_text += "Alternative sources to check:\n" if external_ids.get("ArXiv"): result_text += f"- ArXiv: https://arxiv.org/abs/{external_ids['ArXiv']}\n" if external_ids.get("DOI"): result_text += f"- Publisher (DOI): https://doi.org/{external_ids['DOI']}\n" if external_ids.get("PubMed"): result_text += ( f"- PubMed: https://pubmed.ncbi.nlm.nih.gov/{external_ids['PubMed']}/\n" ) return result_text def main() -> None: """Run the MCP server.""" mcp.run(transport="stdio") if __name__ == "__main__": main()

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/fegizii/SemanticScholarMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server