dbSNP MCP Plugin

from typing import Any, Dict, List, Optional, Union
import httpx
import logging
from mcp.server.fastmcp import FastMCP, Context
from metapub import PubMedFetcher, convert, FindIt
import os
import tempfile
import pymupdf4llm
from pathlib import Path
import aiofiles
import asyncio
from urllib.parse import urlparse, quote

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize FastMCP server and fetchers
mcp = FastMCP("dbSNP")
pubmed_fetch = PubMedFetcher()

# Constants
NWS_API_BASE = "https://clinicaltables.nlm.nih.gov/api/snps/v3/search"
USER_AGENT = "dbsnp-app/1.0"

# Define the fields we want to retrieve
DISPLAY_FIELDS = [
    'rsNum',
    '38.chr', '38.pos', '38.alleles', '38.gene', '38.assembly',
    '37.chr', '37.pos', '37.alleles', '37.gene', '37.assembly'
]

async def fetch_snp_data(query: str) -> Dict[str, Any]:
    """Helper function to fetch SNP data from the API"""
    logger.info(f"[API] Fetching SNP data for query: {query}")
    try:
        async with httpx.AsyncClient() as client:
            response = await client.get(
                NWS_API_BASE,
                params={
                    "terms": query,
                    "df": ",".join(DISPLAY_FIELDS)
                },
                headers={"User-Agent": USER_AGENT}
            )
            response.raise_for_status()
            data = response.json()
            logger.info(f"[API] Successfully fetched data for {query}")
            return data
    except httpx.HTTPError as e:
        logger.error(f"[Error] HTTP error occurred: {str(e)}")
        raise
    except Exception as e:
        logger.error(f"[Error] Unexpected error: {str(e)}")
        raise

def format_snp_data(item: List[str]) -> str:
    """Format SNP data into a readable string"""
    if len(item) != len(DISPLAY_FIELDS):
        return f"SNP ID: {item[0]} (incomplete data)"
    
    data = dict(zip(DISPLAY_FIELDS, item))
    
    # Format GRCh38 data
    grch38_info = [
        f"GRCh38 Assembly:",
        f"  Chromosome: {data['38.chr']}",
        f"  Position: {data['38.pos']}",
        f"  Alleles: {data['38.alleles']}",
        f"  Gene: {data['38.gene'] or 'Not specified'}"
    ]
    
    # Format GRCh37 data
    grch37_info = [
        f"GRCh37 Assembly:",
        f"  Chromosome: {data['37.chr']}",
        f"  Position: {data['37.pos']}",
        f"  Alleles: {data['37.alleles']}",
        f"  Gene: {data['37.gene'] or 'Not specified'}"
    ]
    
    # Combine all information
    return "\n".join([
        f"SNP ID: {data['rsNum']}",
        "",
        *grch38_info,
        "",
        *grch37_info
    ])

@mcp.tool()
async def search_snp(query: str) -> str:
    """
    Search for SNP information by rsID or genomic location.
    
    Args:
        query: SNP identifier (e.g., rs1234) or genomic location
    
    Returns:
        Formatted string containing comprehensive SNP information for both assemblies
    """
    try:
        data = await fetch_snp_data(query)
        if not data or len(data) < 4 or not data[3]:
            return f"No SNP information found for query: {query}"
        
        results = []
        for item in data[3]:
            results.append(format_snp_data(item))
        
        total_results = data[0]
        summary = f"Found {total_results} matching SNPs. Showing first {len(results)}:\n\n"
        return summary + "\n\n==========\n\n".join(results)
    except Exception as e:
        return f"Error searching for SNP: {str(e)}"

@mcp.resource("snp://{rsid}")
async def get_snp_info(rsid: str) -> str:
    """
    Get detailed information about a specific SNP by rsID.
    
    Args:
        rsid: SNP identifier (e.g., rs1234)
    
    Returns:
        Detailed SNP information as a string, including data from both assemblies
    """
    try:
        data = await fetch_snp_data(rsid)
        if not data or len(data) < 4 or not data[3]:
            return f"No information found for SNP: {rsid}"
        
        # Return first match details
        return format_snp_data(data[3][0])
    except Exception as e:
        return f"Error retrieving SNP information: {str(e)}"

@mcp.prompt()
def snp_lookup_help() -> str:
    """Provide help information for SNP lookups"""
    return """
    To look up SNP information, you can:
    1. Search for SNPs using the search_snp tool with a query
    2. Get specific SNP details using the snp://{rsid} resource
    
    The information returned includes:
    - SNP ID (rsNum)
    - GRCh38 Assembly data:
      * Chromosome location
      * Position
      * Alleles
      * Associated gene
    - GRCh37 Assembly data:
      * Chromosome location
      * Position
      * Alleles
      * Associated gene
    
    Examples:
    - search_snp("rs1234")
    - Resource: snp://rs1234
    """

@mcp.tool()
async def get_articles(dois: List[str]) -> str:
    """
    Comprehensive article information retrieval tool. For each DOI, returns:
    - Basic article metadata (title, authors, journal, etc.)
    - Full abstract
    - Full text URL (if available)
    
    Args:
        dois: List of Digital Object Identifiers
    
    Returns:
        Formatted string containing complete information for all articles,
        including any errors encountered during processing
    """
    results = []
    for doi in dois:
        try:
            logger.info(f"[Article] Processing DOI: {doi}")
            article_info = [
                "Article Information:",
                f"DOI: {doi}"
            ]
            
            # Get PMID
            try:
                pmid = convert.doi2pmid(doi)
                if not pmid:
                    raise ValueError("Could not convert DOI to PMID")
                article_info.append(f"PMID: {pmid}")
            except Exception as e:
                logger.error(f"[Error] Failed to get PMID for DOI {doi}: {str(e)}")
                article_info.append(f"Error: Could not retrieve PMID - {str(e)}")
                results.append("\n".join(article_info))
                continue
                
            # Fetch article information
            try:
                article = pubmed_fetch.article_by_pmid(pmid)
                if not article:
                    raise ValueError("No article data returned")
                
                # Basic metadata
                article_info.extend([
                    f"Title: {article.title}",
                    f"Authors: {'; '.join(article.authors)}",
                    f"Journal: {article.journal} ({article.year})",
                    f"Citation: {article.citation}"
                ])
                
                # Abstract
                article_info.extend([
                    "",
                    "Abstract:",
                    article.abstract if article.abstract else "Not available"
                ])
            except Exception as e:
                logger.error(f"[Error] Failed to fetch article data for PMID {pmid}: {str(e)}")
                article_info.append(f"Error: Could not retrieve article data - {str(e)}")
                results.append("\n".join(article_info))
                continue
            
            # Full text URL
            try:
                finder = FindIt(pmid)
                article_info.extend([
                    "",
                    "Full Text Access:"
                ])
                if finder.url:
                    article_info.append(f"URL: {finder.url}")
                else:
                    article_info.extend([
                        "Full text URL not available",
                        f"Reason: {finder.reason}"
                    ])
            except Exception as e:
                logger.error(f"[Error] Failed to find full text for PMID {pmid}: {str(e)}")
                article_info.extend([
                    "",
                    "Full Text Access:",
                    f"Error: Could not retrieve full text URL - {str(e)}"
                ])
            
            results.append("\n".join(article_info))
            
        except Exception as e:
            # Catch any unexpected errors for this DOI
            logger.error(f"[Error] Unexpected error processing DOI {doi}: {str(e)}")
            results.append(f"Failed to process DOI: {doi}\nError: {str(e)}")
    
    # Return results with clear separation between articles
    if not results:
        return "No articles were processed successfully."
    
    return "\n\n==========\n\n".join(results)

@mcp.tool()
async def get_full_text_markdown(dois: List[str]) -> str:
    """
    Downloads open access PDFs using Unpaywall API and converts them to markdown text.
    
    Args:
        dois: List of Digital Object Identifiers
        
    Returns:
        Formatted string containing markdown text for each successfully processed PDF,
        with error messages for any failures
    """
    results = []
    temp_dir = tempfile.mkdtemp(prefix="pdf_processing_")
    email = "matthews1000140@gmail.com"  # Required for Unpaywall API
    
    try:
        async with httpx.AsyncClient() as client:
            for doi in dois:
                try:
                    logger.info(f"[PDF] Processing DOI: {doi}")
                    result_info = [
                        "PDF Processing Results:",
                        f"DOI: {doi}"
                    ]
                    
                    # Query Unpaywall API
                    try:
                        unpaywall_url = f"https://api.unpaywall.org/v2/{quote(doi)}?email={email}"
                        response = await client.get(unpaywall_url)
                        response.raise_for_status()
                        data = response.json()
                        
                        # Check if there's an open access PDF
                        best_oa_location = None
                        if data.get("is_oa", False):
                            # Try to find best PDF URL from available OA locations
                            oa_locations = data.get("oa_locations", [])
                            for location in oa_locations:
                                if location.get("url_for_pdf"):
                                    best_oa_location = location
                                    break
                            if not best_oa_location and oa_locations:
                                # Fallback to first location with any URL
                                best_oa_location = oa_locations[0]
                        
                        if not best_oa_location:
                            raise ValueError("No open access PDF available")
                        
                        pdf_url = best_oa_location.get("url_for_pdf") or best_oa_location.get("url")
                        if not pdf_url:
                            raise ValueError("No PDF URL found in open access location")
                        
                        # Create a safe filename
                        filename = os.path.join(temp_dir, f"{doi.replace('/', '_')}.pdf")
                        
                        # Download PDF
                        pdf_response = await client.get(pdf_url, follow_redirects=True)
                        pdf_response.raise_for_status()
                        
                        # Verify it's a PDF
                        content_type = pdf_response.headers.get('content-type', '')
                        if 'pdf' not in content_type.lower():
                            raise ValueError(f"Not a PDF file (content-type: {content_type})")
                        
                        # Save PDF
                        async with aiofiles.open(filename, 'wb') as f:
                            await f.write(pdf_response.content)
                        
                        # Convert to markdown
                        md_text = pymupdf4llm.to_markdown(filename)
                        
                        result_info.extend([
                            f"Open Access Source: {best_oa_location.get('host_type', 'unknown')}",
                            "",
                            "Markdown Content:",
                            md_text
                        ])
                        
                    except httpx.HTTPError as e:
                        logger.error(f"[Error] API or download error for DOI {doi}: {str(e)}")
                        result_info.append(f"Error: Failed to retrieve or download PDF - {str(e)}")
                    except Exception as e:
                        logger.error(f"[Error] Failed to process PDF for DOI {doi}: {str(e)}")
                        result_info.append(f"Error: Failed to process PDF - {str(e)}")
                    finally:
                        # Clean up the temporary file if it exists
                        if os.path.exists(filename):
                            os.remove(filename)
                            
                except Exception as e:
                    # Catch any unexpected errors for this DOI
                    logger.error(f"[Error] Unexpected error processing DOI {doi}: {str(e)}")
                    result_info = [
                        "PDF Processing Results:",
                        f"DOI: {doi}",
                        f"Error: {str(e)}"
                    ]
                
                results.append("\n".join(result_info))
    
    finally:
        # Clean up temporary directory
        try:
            os.rmdir(temp_dir)
        except Exception as e:
            logger.error(f"[Error] Failed to clean up temporary directory: {str(e)}")
    
    # Return results with clear separation between PDFs
    if not results:
        return "No PDFs were processed successfully."
    
    return "\n\n==========\n\n".join(results)

if __name__ == "__main__":
    import asyncio
    import sys

    # Configure logging to stderr for Claude Desktop
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        stream=sys.stderr
    )
    
    try:
        # Initialize and run the server
        asyncio.run(mcp.run(transport='stdio'))
    except Exception as e:
        logger.error(f"Server failed to start: {str(e)}")
        sys.exit(1)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mattshu0410/MCP-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server