Academic Paper Search MCP Server
by afrise
Verified
import logging
import sys
import os
from datetime import datetime
from typing import Any
import httpx
from mcp.server.fastmcp import FastMCP
import unicodedata
import json
import sys
# Set UTF-8 as default encoding for Python
sys.stdout.recodeinfo = 'utf-8'
if sys.stdout.encoding != 'utf-8':
sys.stdout.reconfigure(encoding='utf-8')
# Initialize FastMCP server
mcp = FastMCP("scientific_literature")
# Constants
SEMANTIC_SCHOLAR_API = "https://api.semanticscholar.org/graph/v1"
CROSSREF_API = "https://api.crossref.org/works"
USER_AGENT = "scientific-literature-app/1.0"
async def make_api_request(url: str, headers: dict = None, params: dict = None) -> dict[str, Any] | None:
"""Make a request to the API with proper error handling."""
if headers is None:
headers = { "User-Agent": USER_AGENT }
async with httpx.AsyncClient() as client:
try:
response = await client.get(url, headers=headers, params=params, timeout=30.0)
response.raise_for_status()
return response.json()
except Exception as e:
return None
def format_paper_data(data: dict, source: str) -> str:
"""Format paper data from different sources into a consistent string format."""
if not data:
return "No paper data available"
try:
if source == "semantic_scholar":
title = unicodedata.normalize('NFKD', str(data.get('title', 'No title available')))
authors = ', '.join([author.get('name', 'Unknown Author') for author in data.get('authors', [])])
year = data.get('year') or 'Year unknown'
external_ids = data.get('externalIds', {}) or {}
doi = external_ids.get('DOI', 'No DOI available')
venue = data.get('venue') or 'Venue unknown'
abstract = data.get('abstract') or 'No abstract available'
tldr = (data.get('tldr') or {}).get('text', '')
is_open = "Yes" if data.get('isOpenAccess') else "No"
pdf_data = data.get('openAccessPdf', {}) or {}
pdf_url = pdf_data.get('url', 'Not available')
elif source == "crossref":
title = (data.get('title') or ['No title available'])[0]
authors = ', '.join([
f"{author.get('given', '')} {author.get('family', '')}".strip() or 'Unknown Author'
for author in data.get('author', [])
])
year = (data.get('published-print', {}).get('date-parts', [['']])[0][0]) or 'Year unknown'
doi = data.get('DOI') or 'No DOI available'
result = [
f"Title: {title}",
f"Authors: {authors}",
f"Year: {year}",
f"DOI: {doi}"
]
if source == "semantic_scholar":
result.extend([
f"Venue: {venue}",
f"Open Access: {is_open}",
f"PDF URL: {pdf_url}",
f"Abstract: {abstract}"
])
if tldr:
result.append(f"TL;DR: {tldr}")
return "\n".join(result) + "\t\t\n"
except Exception as e:
return f"Error formatting paper data: {str(e)}"
@mcp.tool()
async def search_papers(query: str, limit: int = 10) -> str:
"""Search for papers across multiple sources.
args:
query: the search query
limit: the maximum number of results to return (default 10)
"""
if query == "":
return "Please provide a search query."
# Truncate long queries
MAX_QUERY_LENGTH = 300
if len(query) > MAX_QUERY_LENGTH:
original_length = len(query)
query = query[:MAX_QUERY_LENGTH] + "..."
try:
# Search Semantic Scholar
semantic_url = f"{SEMANTIC_SCHOLAR_API}/paper/search?query={query}&limit={limit}"
semantic_data = await make_api_request(semantic_url)
# Search Crossref
crossref_url = f"{CROSSREF_API}?query={query}&rows={limit}"
crossref_data = await make_api_request(crossref_url)
results = []
if semantic_data and 'papers' in semantic_data:
results.append("=== Semantic Scholar Results ===")
for paper in semantic_data['papers']:
results.append(format_paper_data(paper, "semantic_scholar"))
if crossref_data and 'items' in crossref_data.get('message', {}):
results.append("\n=== Crossref Results ===")
for paper in crossref_data['message']['items']:
results.append(format_paper_data(paper, "crossref"))
if not results:
return "No results found or error occurred while fetching papers."
return "\n".join(results)
except:
return "Error searching papers."
@mcp.tool()
async def fetch_paper_details(paper_id: str, source: str = "semantic_scholar") -> str:
"""Get detailed information about a specific paper.
Args:
paper_id: Paper identifier (DOI for Crossref, paper ID for Semantic Scholar)
source: Source database ("semantic_scholar" or "crossref")
"""
if source == "semantic_scholar":
url = f"{SEMANTIC_SCHOLAR_API}/paper/{paper_id}"
elif source == "crossref":
url = f"{CROSSREF_API}/{paper_id}"
else:
return "Unsupported source. Please use 'semantic_scholar' or 'crossref'."
data = await make_api_request(url)
if not data:
return f"Unable to fetch paper details from {source}."
if source == "crossref":
data = data.get('message', {})
return format_paper_data(data, source)
@mcp.tool()
async def search_by_topic(topic: str, year_start: int = None, year_end: int = None, limit: int = 10) -> str:
"""Search for papers by topic with optional date range.
Note: Query length is limited to 300 characters. Longer queries will be automatically truncated.
Args:
topic (str): Search query (max 300 chars)
year_start (int, optional): Start year for date range
year_end (int, optional): End year for date range
limit (int, optional): Maximum number of results to return (default 10)
Returns:
str: Formatted search results or error message
"""
try:
# Truncate long queries to prevent API errors
MAX_QUERY_LENGTH = 300
if len(topic) > MAX_QUERY_LENGTH:
original_length = len(topic)
topic = topic[:MAX_QUERY_LENGTH] + "..."
# Try Semantic Scholar API first
semantic_url = f"{SEMANTIC_SCHOLAR_API}/paper/search"
params = {
"query": topic.encode('utf-8').decode('utf-8'),
"limit": limit,
"fields": "title,authors,year,paperId,externalIds,abstract,venue,isOpenAccess,openAccessPdf,tldr"
}
if year_start and year_end:
params["year"] = f"{year_start}-{year_end}"
headers = {
"Accept": "application/json",
"Content-Type": "application/json; charset=utf-8"
}
data = await make_api_request(semantic_url, headers=headers, params=params)
if data and 'data' in data:
results = ["=== Search Results ==="]
for paper in data['data']:
results.append(format_paper_data(paper, "semantic_scholar"))
return "\n".join(results)
# Fallback to Crossref if Semantic Scholar fails
return await search_papers(topic, limit)
except Exception as e:
return f"Error searching papers!"
if __name__ == "__main__":
# Initialize and run the server
mcp.run(transport='stdio')