PubMed MCP Server

pubmed_search.py•8.48 kB

from Bio import Entrez from fastmcp import FastMCP from typing import List from urllib.error import HTTPError # Configure Entrez email (required by NCBI) Entrez.email = 'user@example.com' # Replace with your actual email mcp = FastMCP(name="PubMed-MCP") def _build_field_query(user_query: str, in_title: bool, in_abstract: bool, in_keywords: bool) -> str: """Build a PubMed (Entrez) query applying field restrictions. Fields mapping used: - Title: [ti] - Abstract: [ab] - Title/Abstract convenience: [tiab] - Keywords (Other Term): [ot] (Author provided keywords) - MeSH Headings: [mh] Strategy: - If only one of title or abstract is selected, use that specific field tag. - If both selected, use tiab (lets PubMed optimize) plus ot/mh if requested. - Keywords option expands with OR clauses for ot and mh. - Parentheses ensure proper boolean grouping. """ core_clauses: List[str] = [] if in_title and in_abstract: # tiab covers both Title and Abstract text core_clauses.append(f"({user_query})[tiab]") elif in_title: core_clauses.append(f"({user_query})[ti]") elif in_abstract: core_clauses.append(f"({user_query})[ab]") else: # No field restriction for title/abstract selected, let user_query as-is (PubMed default: all fields) core_clauses.append(f"({user_query})") if in_keywords: # Include author keywords (ot) and MeSH terms (mh) as expansion keywords_clause = f"({user_query})[ot] OR ({user_query})[mh]" # Combine with previous core clauses using OR to broaden search core_group = " OR ".join(core_clauses) combined = f"({core_group}) OR ({keywords_clause})" return combined return " OR ".join(core_clauses) @mcp.tool def search_pubmed(query: str, max_results: int = 10, title: bool = True, abstract: bool = True, keywords: bool = True): """Search PubMed and return a list of article JSON objects. Parameters: query: Free-text user query; boolean operators (AND/OR/NOT) supported by PubMed. max_results: Maximum number of records to retrieve (retmax). title: If True, include Title field in search restriction (ti / tiab). abstract: If True, include Abstract field in search restriction (ab / tiab). keywords: If True, expand search to Author Keywords (ot) and MeSH Headings (mh). Field logic: - title and abstract both True => core search uses [tiab] - only title True => uses [ti] - only abstract True => uses [ab] - neither title nor abstract True => no restriction (all fields) - keywords True => additionally OR with [ot] and [mh] versions of the query Returns: List[dict]: Each dict contains pmid, title, authors, abstract, journal, publication_year, publication_month, url. """ try: if not isinstance(query, str) or not query.strip(): print("Empty query provided; returning empty result list.") return [] if max_results <= 0: max_results = 10 # Build refined query with field tags refined_query = _build_field_query(query.strip(), title, abstract, keywords) # Search PubMed for article IDs using Entrez.esearch print(f"Searching for: {refined_query}") handle = Entrez.esearch(db="pubmed", term=refined_query, retmax=str(max_results)) search_record = Entrez.read(handle) handle.close() # Get the list of PMIDs pmid_list = search_record["IdList"] total_count = search_record["Count"] print(f"Se encontraron {total_count} artículos. Los primeros {len(pmid_list)} PMIDs son: {pmid_list}") if not pmid_list: print("No articles found for your query.") return [] # Fetch detailed information for each PMID using Entrez.efetch handle = Entrez.efetch(db="pubmed", id=pmid_list, rettype="xml", retmode="xml") records = Entrez.read(handle) handle.close() papers_list = [] # Process each article for record in records['PubmedArticle']: # Get PMID pmid = str(record['MedlineCitation']['PMID']) # Get article details article = record['MedlineCitation']['Article'] # Get title title = str(article.get('ArticleTitle', 'No title found')) # Get authors authors = [] if 'AuthorList' in article: for author in article['AuthorList']: if 'ForeName' in author and 'LastName' in author: authors.append(f"{author['ForeName']} {author['LastName']}") elif 'LastName' in author: authors.append(str(author['LastName'])) # Get abstract abstract = "No abstract found" if 'Abstract' in article and 'AbstractText' in article['Abstract']: abstract_parts = article['Abstract']['AbstractText'] if isinstance(abstract_parts, list): # Handle multiple abstract sections abstract_texts = [] for part in abstract_parts: if hasattr(part, 'get') and part.get('@Label'): # Structured abstract with labels abstract_texts.append(f"{part['@Label']}: {part}") else: abstract_texts.append(str(part)) abstract = " ".join(abstract_texts) else: abstract = str(abstract_parts) # Get publication date pub_date_info = article.get('Journal', {}).get('JournalIssue', {}).get('PubDate', {}) year = str(pub_date_info.get('Year', 'Unknown')) month = str(pub_date_info.get('Month', 'Unknown')) # Get journal name journal = str(article.get('Journal', {}).get('Title', 'No journal found')) # Create paper dictionary paper = { "pmid": pmid, "title": title, "authors": authors, "abstract": abstract, "journal": journal, "publication_year": year, "publication_month": month, "url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" } papers_list.append(paper) print(f"Successfully processed {len(papers_list)} articles.") return papers_list except HTTPError as http_err: print(f"HTTP error during Entrez request: {http_err}") return [] except Exception as e: print(f"An error occurred: {e}") import traceback traceback.print_exc() return [] @mcp.prompt(name="precise_pubmed_query", description="Craft a precise PubMed Boolean query from a natural language information need.") def precise_pubmed_query(information_need: str) -> str: """Returns a very brief prompt (in English) so a model generates a SINGLE concise query to search the topic in PubMed. Advanced options are ignored; signature kept for compatibility. Main parameter: information_need: Topic or information need. """ tema = information_need.strip() or "(specify a topic)" prompt = ( "Generate one brief, precise PubMed query for the given topic. " "Then run the search (tool: search_pubmed) and return ONLY a list of articles in the following format, " "one per line, with no extra text and no JSON:\n\n" "1. Title: <article title> Author: <first author> Journal: <journal> Year: <year> URL: <url>\n" f"Topic: {tema}\n\n" ) return prompt if __name__ == '__main__': #parser = argparse.ArgumentParser(description="Search PubMed for articles.") #parser.add_argument("query", type=str, help="The search query for PubMed.") #parser.add_argument("--max_results", type=int, default=10, help="Maximum number of results to return.") #args = parser.parse_args() #search_pubmed(args.query, args.max_results) mcp.run(transport="stdio")

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/emi-dm/PubMed-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server