Rijksmuseum MCP Server
by r-huijts
- mcp_simple_pubmed
"""
Search functionality for PubMed using Bio.Entrez.
"""
import os
import time
import logging
import xml.etree.ElementTree as ET
from typing import List, Dict, Optional, Any
from Bio import Entrez
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("pubmed-search")
class PubMedSearch:
"""Client for searching PubMed articles using Bio.Entrez."""
def __init__(self, email: str, tool: str, api_key: Optional[str] = None):
"""Initialize PubMed search client with required credentials.
Args:
email: Valid email address for API access
tool: Unique identifier for the tool
api_key: Optional API key for higher rate limits
"""
if not email:
raise ValueError("Email is required for PubMed search")
self.email = email
self.tool = tool
self.api_key = api_key
# Configure Entrez
Entrez.email = email
Entrez.tool = tool
if api_key:
Entrez.api_key = api_key
logger.info(f"PubMed search initialized with email: {email}, tool: {tool}")
async def search_articles(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
"""Search for articles matching the query.
Args:
query: Search query string
max_results: Maximum number of results to return
Returns:
List of article metadata dictionaries
"""
try:
# Replace [Date - Publication] with [PDAT] in query
if "[Date - Publication]" in query:
query = query.replace("[Date - Publication]", "[PDAT]")
# Make sure date ranges are properly formatted
if ":" in query:
parts = query.split(":")
if len(parts) == 2:
before_range = parts[0].strip()
after_range = parts[1].strip()
if all(c not in before_range for c in '"()') and "[PDAT]" in before_range:
# Add quotes and parentheses for date range
date = before_range.replace("[PDAT]", "").strip()
query = f'("{date}"[PDAT] : {after_range}'
logger.info(f"Searching PubMed with query: {query}")
# Step 1: Search for article IDs
try:
handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
search_results = Entrez.read(handle)
handle.close()
# Debug info
logger.info(f"Total results found: {search_results.get('Count', 'Unknown')}")
except Exception as e:
logger.error(f"Error in PubMed search: {str(e)}")
return []
if not 'IdList' in search_results or not search_results['IdList']:
logger.info("No results found")
return []
pmids = search_results['IdList']
logger.info(f"Found {len(pmids)} articles")
# Step 2: Get details for each article
results = []
for pmid in pmids:
logger.info(f"Fetching details for PMID {pmid}")
try:
detail_handle = Entrez.efetch(db="pubmed", id=pmid, rettype="xml")
article_xml = detail_handle.read()
detail_handle.close()
# Parse article details
article_root = ET.fromstring(article_xml)
# Get basic article data
article = {
"pmid": pmid,
"title": self._get_xml_text(article_root, './/ArticleTitle') or "No title",
"abstract": self._get_xml_text(article_root, './/Abstract/AbstractText') or "No abstract available",
"journal": self._get_xml_text(article_root, './/Journal/Title') or "",
"authors": []
}
# Get authors
author_list = article_root.findall('.//Author')
for author in author_list:
last_name = self._get_xml_text(author, 'LastName') or ""
fore_name = self._get_xml_text(author, 'ForeName') or ""
if last_name or fore_name:
article["authors"].append(f"{last_name} {fore_name}".strip())
# Get publication date
pub_date = article_root.find('.//PubDate')
if pub_date is not None:
year = self._get_xml_text(pub_date, 'Year')
month = self._get_xml_text(pub_date, 'Month')
day = self._get_xml_text(pub_date, 'Day')
article["publication_date"] = {
"year": year,
"month": month,
"day": day
}
# Get article identifiers (DOI, PMC)
article_id_list = article_root.findall('.//ArticleId')
for article_id in article_id_list:
id_type = article_id.get('IdType')
if id_type == 'doi':
article["doi"] = article_id.text
elif id_type == 'pmc':
article["pmc_id"] = article_id.text
# Add URLs
article["urls"] = self._generate_urls(pmid,
article.get("doi"),
article.get("pmc_id"))
# Add resource URIs
article["abstract_uri"] = f"pubmed://{pmid}/abstract"
article["full_text_uri"] = f"pubmed://{pmid}/full_text"
results.append(article)
except Exception as e:
logger.error(f"Error fetching details for PMID {pmid}: {str(e)}")
continue
return results
except Exception as e:
logger.exception(f"Error in search_articles: {str(e)}")
return []
def _get_xml_text(self, elem: Optional[ET.Element], xpath: str) -> Optional[str]:
"""Helper method to safely get text from XML element."""
if elem is None:
return None
found = elem.find(xpath)
return found.text if found is not None else None
def _generate_urls(self, pmid: str, doi: Optional[str] = None, pmc_id: Optional[str] = None) -> Dict[str, str]:
"""Generate URLs for human access.
Args:
pmid: PubMed ID
doi: Optional DOI
pmc_id: Optional PMC ID
Returns:
Dictionary with URLs
"""
urls = {
"pubmed": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
"pubmed_mobile": f"https://m.pubmed.ncbi.nlm.nih.gov/{pmid}/"
}
if doi:
urls["doi"] = f"https://doi.org/{doi}"
if pmc_id:
urls["pmc"] = f"https://www.ncbi.nlm.nih.gov/pmc/articles/{pmc_id}/"
return urls