PubMed MCP Server

logic.ts•13.5 KiB

/** * @fileoverview Logic for the pubmed_fetch_contents MCP tool. * Handles EFetch queries for specific PMIDs and formats the results. * This tool can fetch various details from PubMed including abstracts, full XML, * MEDLINE text, and citation data. * @module src/mcp-server/tools/pubmedFetchContents/logic */ import { z } from "zod"; import { getNcbiService } from "../../../services/NCBI/core/ncbiService.js"; import { BaseErrorCode, McpError } from "../../../types-global/errors.js"; import { ParsedArticle, XmlMedlineCitation, XmlPubmedArticleSet, } from "../../../types-global/pubmedXml.js"; import { logger, RequestContext, requestContextService, sanitizeInputForLogging, } from "../../../utils/index.js"; import { ensureArray, extractAbstractText, extractArticleDates, extractAuthors, extractDoi, extractGrants, extractJournalInfo, extractKeywords, extractMeshTerms, extractPmid, extractPublicationTypes, getText, } from "../../../services/NCBI/parsing/index.js"; export const PubMedFetchContentsInputSchema = z .object({ pmids: z .array(z.string().regex(/^\d+$/)) .max(200, "Max 200 PMIDs per call if not using history.") .optional() .describe( "An array of PubMed Unique Identifiers (PMIDs) to fetch. Use this OR queryKey/webEnv.", ), queryKey: z .string() .optional() .describe( "Query key from ESearch history. Requires webEnv. Use this OR pmids.", ), webEnv: z .string() .optional() .describe( "Web environment from ESearch history. Requires queryKey. Use this OR pmids.", ), retstart: z .number() .int() .min(0) .optional() .describe( "0-based index of the first record to retrieve. Used with queryKey/webEnv.", ), retmax: z .number() .int() .min(1) .optional() .describe( "Maximum number of records to retrieve. Used with queryKey/webEnv.", ), detailLevel: z .enum(["abstract_plus", "full_xml", "medline_text", "citation_data"]) .optional() .default("abstract_plus") .describe( "Specifies the level of detail for the fetched content. Options: 'abstract_plus' (parsed details), 'full_xml' (raw PubMedArticle XML), 'medline_text' (MEDLINE format), 'citation_data' (minimal citation data). Defaults to 'abstract_plus'.", ), includeMeshTerms: z .boolean() .optional() .default(true) .describe( "Include MeSH terms in 'abstract_plus' and 'citation_data' results. Default: true.", ), includeGrantInfo: z .boolean() .optional() .default(false) .describe( "Include grant info in 'abstract_plus' results. Default: false.", ), outputFormat: z .enum(["json", "raw_text"]) .optional() .default("json") .describe( "Output format. 'json' (default) wraps data in a JSON object. 'raw_text' returns raw text for 'medline_text' or 'full_xml' detail levels.", ), }) .superRefine((data, ctx) => { if (data.queryKey && !data.webEnv) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "webEnv is required if queryKey is provided.", path: ["webEnv"], }); } if (!data.queryKey && data.webEnv) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "queryKey is required if webEnv is provided.", path: ["queryKey"], }); } if ( (!data.pmids || data.pmids.length === 0) && !(data.queryKey && data.webEnv) ) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "Either pmids (non-empty array) or both queryKey and webEnv must be provided.", path: ["pmids"], }); } if (data.pmids && data.pmids.length > 0 && (data.queryKey || data.webEnv)) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "Cannot use pmids and queryKey/webEnv simultaneously. Please choose one method.", path: ["pmids"], }); } if ( (data.retstart !== undefined || data.retmax !== undefined) && !(data.queryKey && data.webEnv) ) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "retstart/retmax can only be used with queryKey and webEnv.", path: ["retstart"], }); } }); export type PubMedFetchContentsInput = z.infer< typeof PubMedFetchContentsInputSchema >; export type PubMedFetchContentsOutput = { content: string; articlesReturned: number; eFetchUrl: string; }; interface EFetchServiceParams { db: string; id?: string; query_key?: string; WebEnv?: string; retmode?: "xml" | "text"; rettype?: string; retstart?: string; retmax?: string; [key: string]: string | undefined; } function parsePubMedArticleSet( xmlData: unknown, input: PubMedFetchContentsInput, parentContext: RequestContext, ): ParsedArticle[] { const articles: ParsedArticle[] = []; const operationContext = requestContextService.createRequestContext({ parentRequestId: parentContext.requestId, operation: "parsePubMedArticleSet", }); if ( !xmlData || typeof xmlData !== "object" || !("PubmedArticleSet" in xmlData) ) { throw new McpError( BaseErrorCode.PARSING_ERROR, "Invalid or unexpected structure for xmlData in parsePubMedArticleSet.", { ...operationContext, xmlDataType: typeof xmlData, xmlDataPreview: sanitizeInputForLogging( JSON.stringify(xmlData).substring(0, 200), ), }, ); } const typedXmlData = xmlData as { PubmedArticleSet?: XmlPubmedArticleSet }; const articleSet = typedXmlData.PubmedArticleSet; if (!articleSet || !articleSet.PubmedArticle) { logger.warning( "PubmedArticleSet or PubmedArticle array not found in EFetch XML response.", operationContext, ); return articles; } const pubmedArticlesXml = ensureArray(articleSet.PubmedArticle); for (const articleXml of pubmedArticlesXml) { if (!articleXml || typeof articleXml !== "object") continue; const medlineCitation: XmlMedlineCitation | undefined = articleXml.MedlineCitation; if (!medlineCitation) continue; const pmid = extractPmid(medlineCitation); if (!pmid) continue; const articleNode = medlineCitation.Article; const parsedArticle: ParsedArticle = { pmid: pmid, title: articleNode?.ArticleTitle ? getText(articleNode.ArticleTitle) : undefined, abstractText: articleNode?.Abstract ? extractAbstractText(articleNode.Abstract) : undefined, authors: articleNode?.AuthorList ? extractAuthors(articleNode.AuthorList) : undefined, journalInfo: articleNode?.Journal ? extractJournalInfo(articleNode.Journal, medlineCitation) : undefined, publicationTypes: articleNode?.PublicationTypeList ? extractPublicationTypes(articleNode.PublicationTypeList) : undefined, keywords: articleNode?.KeywordList ? extractKeywords(articleNode.KeywordList) : undefined, doi: articleNode ? extractDoi(articleNode) : undefined, articleDates: articleNode?.ArticleDate ? extractArticleDates(articleNode) : undefined, }; if (input.includeMeshTerms) { parsedArticle.meshTerms = medlineCitation.MeshHeadingList ? extractMeshTerms(medlineCitation.MeshHeadingList) : undefined; } if (input.includeGrantInfo) { parsedArticle.grantList = articleNode?.GrantList ? extractGrants(articleNode.GrantList) : undefined; } articles.push(parsedArticle); } return articles; } export async function pubMedFetchContentsLogic( input: PubMedFetchContentsInput, parentRequestContext: RequestContext, ): Promise<PubMedFetchContentsOutput> { const toolLogicContext = requestContextService.createRequestContext({ parentRequestId: parentRequestContext.requestId, operation: "pubMedFetchContentsLogic", input: sanitizeInputForLogging(input), }); const validationResult = PubMedFetchContentsInputSchema.safeParse(input); if (!validationResult.success) { throw new McpError( BaseErrorCode.VALIDATION_ERROR, validationResult.error.errors[0]?.message || "Invalid input", { ...toolLogicContext, details: validationResult.error.flatten() }, ); } const ncbiService = getNcbiService(); logger.info("Executing pubmed_fetch_contents tool", toolLogicContext); const eFetchParams: EFetchServiceParams = { db: "pubmed" }; if (input.queryKey && input.webEnv) { eFetchParams.query_key = input.queryKey; eFetchParams.WebEnv = input.webEnv; if (input.retstart !== undefined) eFetchParams.retstart = String(input.retstart); if (input.retmax !== undefined) eFetchParams.retmax = String(input.retmax); } else if (input.pmids && input.pmids.length > 0) { eFetchParams.id = input.pmids.join(","); } let serviceRetmode: "xml" | "text" = "xml"; let rettype: string | undefined; switch (input.detailLevel) { case "full_xml": serviceRetmode = "xml"; break; case "medline_text": serviceRetmode = "text"; rettype = "medline"; break; case "abstract_plus": case "citation_data": serviceRetmode = "xml"; break; } eFetchParams.retmode = serviceRetmode; if (rettype) eFetchParams.rettype = rettype; const eFetchBase = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"; const eFetchQueryString = new URLSearchParams( eFetchParams as Record<string, string>, ).toString(); const eFetchUrl = `${eFetchBase}?${eFetchQueryString}`; const shouldReturnRawXml = input.detailLevel === "full_xml" && input.outputFormat === "raw_text"; const eFetchResponseData = await ncbiService.eFetch( eFetchParams, toolLogicContext, { retmode: serviceRetmode, rettype, returnRawXml: shouldReturnRawXml }, ); let finalOutputText: string; let articlesCount = 0; if (input.detailLevel === "medline_text") { const medlineText = String(eFetchResponseData); const foundPmidsInMedline = new Set<string>(); const pmidRegex = /^PMID- (\d+)/gm; let match; while ((match = pmidRegex.exec(medlineText)) !== null) { if (match[1]) { foundPmidsInMedline.add(match[1]); } } articlesCount = foundPmidsInMedline.size; if (input.outputFormat === "raw_text") { finalOutputText = medlineText; } else { const notFoundPmids = input.pmids?.filter((pmid) => !foundPmidsInMedline.has(pmid)) || []; finalOutputText = JSON.stringify({ requestedPmids: input.pmids || "N/A (history query)", articles: [{ medlineText }], notFoundPmids, eFetchDetails: { urls: [eFetchUrl] }, }); } } else if (input.detailLevel === "full_xml") { const articlesXml = ensureArray( (eFetchResponseData as { PubmedArticleSet?: XmlPubmedArticleSet }) ?.PubmedArticleSet?.PubmedArticle || [], ); articlesCount = articlesXml.length; if (input.outputFormat === "raw_text") { // Note: Raw XML output is requested, but we still parse to get an accurate count. // This is a trade-off for robustness over performance in this specific case. finalOutputText = String(eFetchResponseData); } else { const foundPmidsInXml = new Set<string>(); const articlesPayload = articlesXml.map((articleXml) => { const pmid = extractPmid(articleXml.MedlineCitation) || "unknown_pmid"; if (pmid !== "unknown_pmid") foundPmidsInXml.add(pmid); return { pmid, fullXmlContent: articleXml }; }); const notFoundPmids = input.pmids?.filter((pmid) => !foundPmidsInXml.has(pmid)) || []; finalOutputText = JSON.stringify({ requestedPmids: input.pmids || "N/A (history query)", articles: articlesPayload, notFoundPmids, eFetchDetails: { urls: [eFetchUrl] }, }); } } else { const parsedArticles = parsePubMedArticleSet( eFetchResponseData as XmlPubmedArticleSet, input, toolLogicContext, ); articlesCount = parsedArticles.length; const foundPmids = new Set(parsedArticles.map((p) => p.pmid)); const notFoundPmids = input.pmids?.filter((pmid) => !foundPmids.has(pmid)) || []; let articlesToReturn: ParsedArticle[] | Record<string, unknown>[] = parsedArticles; if (input.detailLevel === "citation_data") { articlesToReturn = parsedArticles.map((article) => ({ pmid: article.pmid, title: article.title, authors: article.authors?.map((a) => ({ lastName: a.lastName, initials: a.initials, })), journalInfo: { title: article.journalInfo?.title, isoAbbreviation: article.journalInfo?.isoAbbreviation, volume: article.journalInfo?.volume, issue: article.journalInfo?.issue, pages: article.journalInfo?.pages, year: article.journalInfo?.publicationDate?.year, }, doi: article.doi, ...(input.includeMeshTerms && { meshTerms: article.meshTerms }), })); } finalOutputText = JSON.stringify({ requestedPmids: input.pmids || "N/A (history query)", articles: articlesToReturn, notFoundPmids, eFetchDetails: { urls: [eFetchUrl] }, }); } logger.notice("Successfully executed pubmed_fetch_contents tool.", { ...toolLogicContext, articlesReturned: articlesCount, }); return { content: finalOutputText, articlesReturned: articlesCount, eFetchUrl, }; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cyanheads/pubmed-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

logic.ts•13.5 KiB