Rijksmuseum MCP Server

  • mcp_simple_pubmed
""" Client for retrieving full text content of PubMed articles. Separate from main PubMed client to maintain code separation and stability. """ import logging import time import http.client from typing import Optional, Tuple from Bio import Entrez import xml.etree.ElementTree as ET # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger("pubmed-fulltext") class FullTextClient: """Client for retrieving full text content from PubMed Central.""" def __init__(self, email: str, tool: str, api_key: Optional[str] = None): """Initialize full text client with required credentials. Args: email: Valid email address for API access tool: Unique identifier for the tool api_key: Optional API key for higher rate limits """ self.email = email self.tool = tool self.api_key = api_key # Configure Entrez Entrez.email = email Entrez.tool = tool if api_key: Entrez.api_key = api_key async def check_full_text_availability(self, pmid: str) -> Tuple[bool, Optional[str]]: """Check if full text is available in PMC and get PMC ID if it exists. Args: pmid: PubMed ID of the article Returns: Tuple of (availability boolean, PMC ID if available) """ try: logger.info(f"Checking PMC availability for PMID {pmid}") handle = Entrez.elink(dbfrom="pubmed", db="pmc", id=pmid) if not handle: logger.info(f"No PMC link found for PMID {pmid}") return False, None xml_content = handle.read() handle.close() # Parse XML to get PMC ID root = ET.fromstring(xml_content) linksetdb = root.find(".//LinkSetDb") if linksetdb is None: logger.info(f"No PMC ID found for PMID {pmid}") return False, None id_elem = linksetdb.find(".//Id") if id_elem is None: logger.info(f"No PMC ID element found for PMID {pmid}") return False, None pmc_id = id_elem.text logger.info(f"Found PMC ID {pmc_id} for PMID {pmid}") return True, pmc_id except Exception as e: logger.exception(f"Error checking PMC availability for PMID {pmid}: {str(e)}") return False, None async def get_full_text(self, pmid: str) -> Optional[str]: """Get full text of the article if available through PMC. Handles truncated responses by making additional requests. Args: pmid: PubMed ID of the article Returns: Full text content if available, None otherwise """ try: # First check availability and get PMC ID available, pmc_id = await self.check_full_text_availability(pmid) if not available or pmc_id is None: logger.info(f"Full text not available in PMC for PMID {pmid}") return None logger.info(f"Fetching full text for PMC ID {pmc_id}") content = "" retstart = 0 while True: full_text_handle = Entrez.efetch( db="pmc", id=pmc_id, rettype="xml", retstart=retstart ) if not full_text_handle: break chunk = full_text_handle.read() full_text_handle.close() if isinstance(chunk, bytes): chunk = chunk.decode('utf-8') content += chunk # Check if there might be more content if "[truncated]" not in chunk and "Result too long" not in chunk: break # Increment retstart for next chunk retstart += len(chunk) # Add small delay to respect API rate limits time.sleep(0.5) return content except Exception as e: logger.exception(f"Error getting full text for PMID {pmid}: {str(e)}") return None