dbSNP MCP Plugin
by mattshu0410
from typing import Any, Dict, List, Optional, Union
import httpx
import logging
from mcp.server.fastmcp import FastMCP, Context
from metapub import PubMedFetcher, convert, FindIt
import os
import tempfile
import pymupdf4llm
from pathlib import Path
import aiofiles
import asyncio
from urllib.parse import urlparse, quote
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Initialize FastMCP server and fetchers
mcp = FastMCP("dbSNP")
pubmed_fetch = PubMedFetcher()
# Constants
NWS_API_BASE = "https://clinicaltables.nlm.nih.gov/api/snps/v3/search"
USER_AGENT = "dbsnp-app/1.0"
# Define the fields we want to retrieve
DISPLAY_FIELDS = [
'rsNum',
'38.chr', '38.pos', '38.alleles', '38.gene', '38.assembly',
'37.chr', '37.pos', '37.alleles', '37.gene', '37.assembly'
]
async def fetch_snp_data(query: str) -> Dict[str, Any]:
"""Helper function to fetch SNP data from the API"""
logger.info(f"[API] Fetching SNP data for query: {query}")
try:
async with httpx.AsyncClient() as client:
response = await client.get(
NWS_API_BASE,
params={
"terms": query,
"df": ",".join(DISPLAY_FIELDS)
},
headers={"User-Agent": USER_AGENT}
)
response.raise_for_status()
data = response.json()
logger.info(f"[API] Successfully fetched data for {query}")
return data
except httpx.HTTPError as e:
logger.error(f"[Error] HTTP error occurred: {str(e)}")
raise
except Exception as e:
logger.error(f"[Error] Unexpected error: {str(e)}")
raise
def format_snp_data(item: List[str]) -> str:
"""Format SNP data into a readable string"""
if len(item) != len(DISPLAY_FIELDS):
return f"SNP ID: {item[0]} (incomplete data)"
data = dict(zip(DISPLAY_FIELDS, item))
# Format GRCh38 data
grch38_info = [
f"GRCh38 Assembly:",
f" Chromosome: {data['38.chr']}",
f" Position: {data['38.pos']}",
f" Alleles: {data['38.alleles']}",
f" Gene: {data['38.gene'] or 'Not specified'}"
]
# Format GRCh37 data
grch37_info = [
f"GRCh37 Assembly:",
f" Chromosome: {data['37.chr']}",
f" Position: {data['37.pos']}",
f" Alleles: {data['37.alleles']}",
f" Gene: {data['37.gene'] or 'Not specified'}"
]
# Combine all information
return "\n".join([
f"SNP ID: {data['rsNum']}",
"",
*grch38_info,
"",
*grch37_info
])
@mcp.tool()
async def search_snp(query: str) -> str:
"""
Search for SNP information by rsID or genomic location.
Args:
query: SNP identifier (e.g., rs1234) or genomic location
Returns:
Formatted string containing comprehensive SNP information for both assemblies
"""
try:
data = await fetch_snp_data(query)
if not data or len(data) < 4 or not data[3]:
return f"No SNP information found for query: {query}"
results = []
for item in data[3]:
results.append(format_snp_data(item))
total_results = data[0]
summary = f"Found {total_results} matching SNPs. Showing first {len(results)}:\n\n"
return summary + "\n\n==========\n\n".join(results)
except Exception as e:
return f"Error searching for SNP: {str(e)}"
@mcp.resource("snp://{rsid}")
async def get_snp_info(rsid: str) -> str:
"""
Get detailed information about a specific SNP by rsID.
Args:
rsid: SNP identifier (e.g., rs1234)
Returns:
Detailed SNP information as a string, including data from both assemblies
"""
try:
data = await fetch_snp_data(rsid)
if not data or len(data) < 4 or not data[3]:
return f"No information found for SNP: {rsid}"
# Return first match details
return format_snp_data(data[3][0])
except Exception as e:
return f"Error retrieving SNP information: {str(e)}"
@mcp.prompt()
def snp_lookup_help() -> str:
"""Provide help information for SNP lookups"""
return """
To look up SNP information, you can:
1. Search for SNPs using the search_snp tool with a query
2. Get specific SNP details using the snp://{rsid} resource
The information returned includes:
- SNP ID (rsNum)
- GRCh38 Assembly data:
* Chromosome location
* Position
* Alleles
* Associated gene
- GRCh37 Assembly data:
* Chromosome location
* Position
* Alleles
* Associated gene
Examples:
- search_snp("rs1234")
- Resource: snp://rs1234
"""
@mcp.tool()
async def get_articles(dois: List[str]) -> str:
"""
Comprehensive article information retrieval tool. For each DOI, returns:
- Basic article metadata (title, authors, journal, etc.)
- Full abstract
- Full text URL (if available)
Args:
dois: List of Digital Object Identifiers
Returns:
Formatted string containing complete information for all articles,
including any errors encountered during processing
"""
results = []
for doi in dois:
try:
logger.info(f"[Article] Processing DOI: {doi}")
article_info = [
"Article Information:",
f"DOI: {doi}"
]
# Get PMID
try:
pmid = convert.doi2pmid(doi)
if not pmid:
raise ValueError("Could not convert DOI to PMID")
article_info.append(f"PMID: {pmid}")
except Exception as e:
logger.error(f"[Error] Failed to get PMID for DOI {doi}: {str(e)}")
article_info.append(f"Error: Could not retrieve PMID - {str(e)}")
results.append("\n".join(article_info))
continue
# Fetch article information
try:
article = pubmed_fetch.article_by_pmid(pmid)
if not article:
raise ValueError("No article data returned")
# Basic metadata
article_info.extend([
f"Title: {article.title}",
f"Authors: {'; '.join(article.authors)}",
f"Journal: {article.journal} ({article.year})",
f"Citation: {article.citation}"
])
# Abstract
article_info.extend([
"",
"Abstract:",
article.abstract if article.abstract else "Not available"
])
except Exception as e:
logger.error(f"[Error] Failed to fetch article data for PMID {pmid}: {str(e)}")
article_info.append(f"Error: Could not retrieve article data - {str(e)}")
results.append("\n".join(article_info))
continue
# Full text URL
try:
finder = FindIt(pmid)
article_info.extend([
"",
"Full Text Access:"
])
if finder.url:
article_info.append(f"URL: {finder.url}")
else:
article_info.extend([
"Full text URL not available",
f"Reason: {finder.reason}"
])
except Exception as e:
logger.error(f"[Error] Failed to find full text for PMID {pmid}: {str(e)}")
article_info.extend([
"",
"Full Text Access:",
f"Error: Could not retrieve full text URL - {str(e)}"
])
results.append("\n".join(article_info))
except Exception as e:
# Catch any unexpected errors for this DOI
logger.error(f"[Error] Unexpected error processing DOI {doi}: {str(e)}")
results.append(f"Failed to process DOI: {doi}\nError: {str(e)}")
# Return results with clear separation between articles
if not results:
return "No articles were processed successfully."
return "\n\n==========\n\n".join(results)
@mcp.tool()
async def get_full_text_markdown(dois: List[str]) -> str:
"""
Downloads open access PDFs using Unpaywall API and converts them to markdown text.
Args:
dois: List of Digital Object Identifiers
Returns:
Formatted string containing markdown text for each successfully processed PDF,
with error messages for any failures
"""
results = []
temp_dir = tempfile.mkdtemp(prefix="pdf_processing_")
email = "matthews1000140@gmail.com" # Required for Unpaywall API
try:
async with httpx.AsyncClient() as client:
for doi in dois:
try:
logger.info(f"[PDF] Processing DOI: {doi}")
result_info = [
"PDF Processing Results:",
f"DOI: {doi}"
]
# Query Unpaywall API
try:
unpaywall_url = f"https://api.unpaywall.org/v2/{quote(doi)}?email={email}"
response = await client.get(unpaywall_url)
response.raise_for_status()
data = response.json()
# Check if there's an open access PDF
best_oa_location = None
if data.get("is_oa", False):
# Try to find best PDF URL from available OA locations
oa_locations = data.get("oa_locations", [])
for location in oa_locations:
if location.get("url_for_pdf"):
best_oa_location = location
break
if not best_oa_location and oa_locations:
# Fallback to first location with any URL
best_oa_location = oa_locations[0]
if not best_oa_location:
raise ValueError("No open access PDF available")
pdf_url = best_oa_location.get("url_for_pdf") or best_oa_location.get("url")
if not pdf_url:
raise ValueError("No PDF URL found in open access location")
# Create a safe filename
filename = os.path.join(temp_dir, f"{doi.replace('/', '_')}.pdf")
# Download PDF
pdf_response = await client.get(pdf_url, follow_redirects=True)
pdf_response.raise_for_status()
# Verify it's a PDF
content_type = pdf_response.headers.get('content-type', '')
if 'pdf' not in content_type.lower():
raise ValueError(f"Not a PDF file (content-type: {content_type})")
# Save PDF
async with aiofiles.open(filename, 'wb') as f:
await f.write(pdf_response.content)
# Convert to markdown
md_text = pymupdf4llm.to_markdown(filename)
result_info.extend([
f"Open Access Source: {best_oa_location.get('host_type', 'unknown')}",
"",
"Markdown Content:",
md_text
])
except httpx.HTTPError as e:
logger.error(f"[Error] API or download error for DOI {doi}: {str(e)}")
result_info.append(f"Error: Failed to retrieve or download PDF - {str(e)}")
except Exception as e:
logger.error(f"[Error] Failed to process PDF for DOI {doi}: {str(e)}")
result_info.append(f"Error: Failed to process PDF - {str(e)}")
finally:
# Clean up the temporary file if it exists
if os.path.exists(filename):
os.remove(filename)
except Exception as e:
# Catch any unexpected errors for this DOI
logger.error(f"[Error] Unexpected error processing DOI {doi}: {str(e)}")
result_info = [
"PDF Processing Results:",
f"DOI: {doi}",
f"Error: {str(e)}"
]
results.append("\n".join(result_info))
finally:
# Clean up temporary directory
try:
os.rmdir(temp_dir)
except Exception as e:
logger.error(f"[Error] Failed to clean up temporary directory: {str(e)}")
# Return results with clear separation between PDFs
if not results:
return "No PDFs were processed successfully."
return "\n\n==========\n\n".join(results)
if __name__ == "__main__":
import asyncio
import sys
# Configure logging to stderr for Claude Desktop
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
stream=sys.stderr
)
try:
# Initialize and run the server
asyncio.run(mcp.run(transport='stdio'))
except Exception as e:
logger.error(f"Server failed to start: {str(e)}")
sys.exit(1)