Skip to main content
Glama

mcp-simple-pubmed

by andybrandt
MIT License
134
  • Apple
pubmed_client.py8.87 kB
""" Client for interacting with PubMed/Entrez API. """ import os import time import logging import http.client import xml.etree.ElementTree as ET from typing import List, Dict, Optional, Any from Bio import Entrez # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger("pubmed-client") class PubMedClient: """Client for interacting with PubMed/Entrez API.""" def __init__(self, email: str, tool: str, api_key: Optional[str] = None): """Initialize PubMed client with required credentials. Args: email: Valid email address for API access tool: Unique identifier for the tool api_key: Optional API key for higher rate limits """ self.email = email self.tool = tool self.api_key = api_key # Configure Entrez Entrez.email = email Entrez.tool = tool if api_key: Entrez.api_key = api_key async def search_articles(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]: """Search for articles matching the query. Args: query: Search query string max_results: Maximum number of results to return Returns: List of article metadata dictionaries """ try: logger.info(f"Searching PubMed with query: {query}") results = [] # Step 1: Search for article IDs handle = Entrez.esearch(db="pubmed", term=query, retmax=str(max_results)) if not handle: logger.error("Got None handle from esearch") return [] if isinstance(handle, http.client.HTTPResponse): logger.info("Got valid HTTP response from esearch") xml_content = handle.read() handle.close() # Parse XML to get IDs root = ET.fromstring(xml_content) id_list = root.findall('.//Id') if not id_list: logger.info("No results found") return [] pmids = [id_elem.text for id_elem in id_list] logger.info(f"Found {len(pmids)} articles") # Step 2: Get details for each article for pmid in pmids: article = await self.get_article_details(pmid) if article: results.append(article) return results except Exception as e: logger.exception(f"Error in search_articles: {str(e)}") raise async def get_article_details(self, pmid: str) -> Optional[Dict[str, Any]]: """Get details for a specific article by PMID. Args: pmid: PubMed ID of the article Returns: Dictionary with article metadata or None if not found """ try: logger.info(f"Fetching details for PMID {pmid}") detail_handle = Entrez.efetch(db="pubmed", id=pmid, rettype="xml") if detail_handle and isinstance(detail_handle, http.client.HTTPResponse): article_xml = detail_handle.read() detail_handle.close() # Parse article details article_root = ET.fromstring(article_xml) # Get basic article data article = { "pmid": pmid, "title": self._get_xml_text(article_root, './/ArticleTitle') or "No title", "abstract": self._get_full_abstract(article_root) or "No abstract available", "journal": self._get_xml_text(article_root, './/Journal/Title') or "", "authors": [], "keywords": [], "mesh_terms": [] } # Get authors author_list = article_root.findall('.//Author') for author in author_list: last_name = self._get_xml_text(author, 'LastName') or "" fore_name = self._get_xml_text(author, 'ForeName') or "" if last_name or fore_name: article["authors"].append(f"{last_name} {fore_name}".strip()) # Get publication date pub_date = article_root.find('.//PubDate') if pub_date is not None: year = self._get_xml_text(pub_date, 'Year') month = self._get_xml_text(pub_date, 'Month') day = self._get_xml_text(pub_date, 'Day') article["publication_date"] = { "year": year, "month": month, "day": day } # Get DOI and PMCID if available # Important: Only get ArticleIds from the main ArticleIdList, not from references pubmed_data = article_root.find('.//PubmedData') if pubmed_data is not None: # Use direct child path to avoid getting IDs from ReferenceList article_id_list_elem = pubmed_data.find('ArticleIdList') if article_id_list_elem is not None: for article_id in article_id_list_elem: id_type = article_id.get('IdType') if id_type == 'doi': article["doi"] = article_id.text elif id_type == 'pmc': article["pmcid"] = article_id.text # Get Keywords keyword_list = article_root.findall('.//Keyword') for keyword in keyword_list: if keyword.text: # Clean up keyword text (remove trailing periods, etc.) clean_keyword = keyword.text.strip().rstrip('.') if clean_keyword: article["keywords"].append(clean_keyword) # Get MeSH terms mesh_heading_list = article_root.findall('.//MeshHeading') for mesh_heading in mesh_heading_list: descriptor = mesh_heading.find('DescriptorName') if descriptor is not None and descriptor.text: mesh_term = { "descriptor": descriptor.text, "ui": descriptor.get('UI', ''), "qualifiers": [] } # Get qualifiers if present qualifiers = mesh_heading.findall('QualifierName') for qualifier in qualifiers: if qualifier.text: mesh_term["qualifiers"].append({ "name": qualifier.text, "ui": qualifier.get('UI', '') }) article["mesh_terms"].append(mesh_term) return article return None except Exception as e: logger.exception(f"Error getting article details for PMID {pmid}: {str(e)}") return None def _get_xml_text(self, elem: Optional[ET.Element], xpath: str) -> Optional[str]: """Helper method to safely get text from XML element.""" if elem is None: return None found = elem.find(xpath) return found.text if found is not None else None def _get_full_abstract(self, article_root: Optional[ET.Element]) -> Optional[str]: """Get complete abstract text, handling structured abstracts with multiple sections.""" if article_root is None: return None abstract_texts = article_root.findall('.//Abstract/AbstractText') if not abstract_texts: return None # If there's only one AbstractText element, return it directly if len(abstract_texts) == 1: return abstract_texts[0].text # For structured abstracts with multiple sections abstract_parts = [] for text_elem in abstract_texts: label = text_elem.get('Label') text = text_elem.text or "" if label: # Format as "LABEL: text" abstract_parts.append(f"{label}: {text}") else: abstract_parts.append(text) # Join all parts with double newline for readability return "\n\n".join(abstract_parts)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/andybrandt/mcp-simple-pubmed'

If you have feedback or need assistance with the MCP directory API, please join our Discord server