Skip to main content
Glama
logic.ts•13.8 kB
/** * @fileoverview Logic for the pubmed_fetch_contents MCP tool. * Handles EFetch queries for specific PMIDs and formats the results. * This tool can fetch various details from PubMed including abstracts, full XML, * MEDLINE text, and citation data. * @module src/mcp-server/tools/pubmedFetchContents/logic */ import { z } from "zod"; import { getNcbiService } from "../../../services/NCBI/core/ncbiService.js"; import { BaseErrorCode, McpError } from "../../../types-global/errors.js"; import { ParsedArticle, XmlMedlineCitation, XmlPubmedArticleSet, } from "../../../types-global/pubmedXml.js"; import { logger, RequestContext, requestContextService, sanitizeInputForLogging, } from "../../../utils/index.js"; import { ensureArray, extractAbstractText, extractArticleDates, extractAuthors, extractDoi, extractGrants, extractJournalInfo, extractKeywords, extractMeshTerms, extractPmid, extractPublicationTypes, getText, } from "../../../services/NCBI/parsing/index.js"; export const PubMedFetchContentsInputSchema = z .object({ pmids: z .array(z.string().regex(/^\d+$/)) .max(200, "Max 200 PMIDs per call if not using history.") .optional() .describe( "An array of PubMed Unique Identifiers (PMIDs) to fetch. Use this OR queryKey/webEnv.", ), queryKey: z .string() .optional() .describe( "Query key from ESearch history. Requires webEnv. Use this OR pmids.", ), webEnv: z .string() .optional() .describe( "Web environment from ESearch history. Requires queryKey. Use this OR pmids.", ), retstart: z .number() .int() .min(0) .optional() .describe( "0-based index of the first record to retrieve. Used with queryKey/webEnv.", ), retmax: z .number() .int() .min(1) .optional() .describe( "Maximum number of records to retrieve. Used with queryKey/webEnv.", ), detailLevel: z .enum(["abstract_plus", "full_xml", "medline_text", "citation_data"]) .optional() .default("abstract_plus") .describe( "Specifies the level of detail for the fetched content. Options: 'abstract_plus' (parsed details), 'full_xml' (raw PubMedArticle XML), 'medline_text' (MEDLINE format), 'citation_data' (minimal citation data). Defaults to 'abstract_plus'.", ), includeMeshTerms: z .boolean() .optional() .default(true) .describe( "Include MeSH terms in 'abstract_plus' and 'citation_data' results. Default: true.", ), includeGrantInfo: z .boolean() .optional() .default(false) .describe( "Include grant info in 'abstract_plus' results. Default: false.", ), outputFormat: z .enum(["json", "raw_text"]) .optional() .default("json") .describe( "Output format. 'json' (default) wraps data in a JSON object. 'raw_text' returns raw text for 'medline_text' or 'full_xml' detail levels.", ), }) .superRefine((data, ctx) => { if (data.queryKey && !data.webEnv) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "webEnv is required if queryKey is provided.", path: ["webEnv"], }); } if (!data.queryKey && data.webEnv) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "queryKey is required if webEnv is provided.", path: ["queryKey"], }); } if ( (!data.pmids || data.pmids.length === 0) && !(data.queryKey && data.webEnv) ) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "Either pmids (non-empty array) or both queryKey and webEnv must be provided.", path: ["pmids"], }); } if (data.pmids && data.pmids.length > 0 && (data.queryKey || data.webEnv)) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "Cannot use pmids and queryKey/webEnv simultaneously. Please choose one method.", path: ["pmids"], }); } if ( (data.retstart !== undefined || data.retmax !== undefined) && !(data.queryKey && data.webEnv) ) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "retstart/retmax can only be used with queryKey and webEnv.", path: ["retstart"], }); } }); export type PubMedFetchContentsInput = z.infer< typeof PubMedFetchContentsInputSchema >; export type PubMedFetchContentsOutput = { content: string; articlesReturned: number; eFetchUrl: string; }; interface EFetchServiceParams { db: string; id?: string; query_key?: string; WebEnv?: string; retmode?: "xml" | "text"; rettype?: string; retstart?: string; retmax?: string; [key: string]: string | undefined; } function parsePubMedArticleSet( xmlData: unknown, input: PubMedFetchContentsInput, parentContext: RequestContext, ): ParsedArticle[] { const articles: ParsedArticle[] = []; const operationContext = requestContextService.createRequestContext({ parentRequestId: parentContext.requestId, operation: "parsePubMedArticleSet", }); if ( !xmlData || typeof xmlData !== "object" || !("PubmedArticleSet" in xmlData) ) { throw new McpError( BaseErrorCode.PARSING_ERROR, "Invalid or unexpected structure for xmlData in parsePubMedArticleSet.", { ...operationContext, xmlDataType: typeof xmlData, xmlDataPreview: sanitizeInputForLogging( JSON.stringify(xmlData).substring(0, 200), ), }, ); } const typedXmlData = xmlData as { PubmedArticleSet?: XmlPubmedArticleSet }; const articleSet = typedXmlData.PubmedArticleSet; if (!articleSet || !articleSet.PubmedArticle) { logger.warning( "PubmedArticleSet or PubmedArticle array not found in EFetch XML response.", operationContext, ); return articles; } const pubmedArticlesXml = ensureArray(articleSet.PubmedArticle); for (const articleXml of pubmedArticlesXml) { if (!articleXml || typeof articleXml !== "object") continue; const medlineCitation: XmlMedlineCitation | undefined = articleXml.MedlineCitation; if (!medlineCitation) continue; const pmid = extractPmid(medlineCitation); if (!pmid) continue; const articleNode = medlineCitation.Article; const parsedArticle: ParsedArticle = { pmid: pmid, title: articleNode?.ArticleTitle ? getText(articleNode.ArticleTitle) : undefined, abstractText: articleNode?.Abstract ? extractAbstractText(articleNode.Abstract) : undefined, authors: articleNode?.AuthorList ? extractAuthors(articleNode.AuthorList) : undefined, journalInfo: articleNode?.Journal ? extractJournalInfo(articleNode.Journal, medlineCitation) : undefined, publicationTypes: articleNode?.PublicationTypeList ? extractPublicationTypes(articleNode.PublicationTypeList) : undefined, keywords: articleNode?.KeywordList ? extractKeywords(articleNode.KeywordList) : undefined, doi: articleNode ? extractDoi(articleNode) : undefined, articleDates: articleNode?.ArticleDate ? extractArticleDates(articleNode) : undefined, }; if (input.includeMeshTerms) { parsedArticle.meshTerms = medlineCitation.MeshHeadingList ? extractMeshTerms(medlineCitation.MeshHeadingList) : undefined; } if (input.includeGrantInfo) { parsedArticle.grantList = articleNode?.GrantList ? extractGrants(articleNode.GrantList) : undefined; } articles.push(parsedArticle); } return articles; } export async function pubMedFetchContentsLogic( input: PubMedFetchContentsInput, parentRequestContext: RequestContext, ): Promise<PubMedFetchContentsOutput> { const toolLogicContext = requestContextService.createRequestContext({ parentRequestId: parentRequestContext.requestId, operation: "pubMedFetchContentsLogic", input: sanitizeInputForLogging(input), }); const validationResult = PubMedFetchContentsInputSchema.safeParse(input); if (!validationResult.success) { throw new McpError( BaseErrorCode.VALIDATION_ERROR, validationResult.error.errors[0]?.message || "Invalid input", { ...toolLogicContext, details: validationResult.error.flatten() }, ); } const ncbiService = getNcbiService(); logger.info("Executing pubmed_fetch_contents tool", toolLogicContext); const eFetchParams: EFetchServiceParams = { db: "pubmed" }; if (input.queryKey && input.webEnv) { eFetchParams.query_key = input.queryKey; eFetchParams.WebEnv = input.webEnv; if (input.retstart !== undefined) eFetchParams.retstart = String(input.retstart); if (input.retmax !== undefined) eFetchParams.retmax = String(input.retmax); } else if (input.pmids && input.pmids.length > 0) { eFetchParams.id = input.pmids.join(","); } let serviceRetmode: "xml" | "text" = "xml"; let rettype: string | undefined; switch (input.detailLevel) { case "full_xml": serviceRetmode = "xml"; break; case "medline_text": serviceRetmode = "text"; rettype = "medline"; break; case "abstract_plus": case "citation_data": serviceRetmode = "xml"; break; } eFetchParams.retmode = serviceRetmode; if (rettype) eFetchParams.rettype = rettype; const eFetchBase = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"; const eFetchQueryString = new URLSearchParams( eFetchParams as Record<string, string>, ).toString(); const eFetchUrl = `${eFetchBase}?${eFetchQueryString}`; const shouldReturnRawXml = input.detailLevel === "full_xml" && input.outputFormat === "raw_text"; const eFetchResponseData = await ncbiService.eFetch( eFetchParams, toolLogicContext, { retmode: serviceRetmode, rettype, returnRawXml: shouldReturnRawXml }, ); let finalOutputText: string; let articlesCount = 0; if (input.detailLevel === "medline_text") { const medlineText = String(eFetchResponseData); const foundPmidsInMedline = new Set<string>(); const pmidRegex = /^PMID- (\d+)/gm; let match; while ((match = pmidRegex.exec(medlineText)) !== null) { if (match[1]) { foundPmidsInMedline.add(match[1]); } } articlesCount = foundPmidsInMedline.size; if (input.outputFormat === "raw_text") { finalOutputText = medlineText; } else { const notFoundPmids = input.pmids?.filter((pmid) => !foundPmidsInMedline.has(pmid)) || []; finalOutputText = JSON.stringify({ requestedPmids: input.pmids || "N/A (history query)", articles: [{ medlineText }], notFoundPmids, eFetchDetails: { urls: [eFetchUrl] }, }); } } else if (input.detailLevel === "full_xml") { const articlesXml = ensureArray( (eFetchResponseData as { PubmedArticleSet?: XmlPubmedArticleSet }) ?.PubmedArticleSet?.PubmedArticle || [], ); articlesCount = articlesXml.length; if (input.outputFormat === "raw_text") { // Note: Raw XML output is requested, but we still parse to get an accurate count. // This is a trade-off for robustness over performance in this specific case. finalOutputText = String(eFetchResponseData); } else { const foundPmidsInXml = new Set<string>(); const articlesPayload = articlesXml.map((articleXml) => { const pmid = extractPmid(articleXml.MedlineCitation) || "unknown_pmid"; if (pmid !== "unknown_pmid") foundPmidsInXml.add(pmid); return { pmid, fullXmlContent: articleXml }; }); const notFoundPmids = input.pmids?.filter((pmid) => !foundPmidsInXml.has(pmid)) || []; finalOutputText = JSON.stringify({ requestedPmids: input.pmids || "N/A (history query)", articles: articlesPayload, notFoundPmids, eFetchDetails: { urls: [eFetchUrl] }, }); } } else { const parsedArticles = parsePubMedArticleSet( eFetchResponseData as XmlPubmedArticleSet, input, toolLogicContext, ); articlesCount = parsedArticles.length; const foundPmids = new Set(parsedArticles.map((p) => p.pmid)); const notFoundPmids = input.pmids?.filter((pmid) => !foundPmids.has(pmid)) || []; let articlesToReturn: ParsedArticle[] | Record<string, unknown>[] = parsedArticles; if (input.detailLevel === "citation_data") { articlesToReturn = parsedArticles.map((article) => ({ pmid: article.pmid, title: article.title, authors: article.authors?.map((a) => ({ lastName: a.lastName, initials: a.initials, })), journalInfo: { title: article.journalInfo?.title, isoAbbreviation: article.journalInfo?.isoAbbreviation, volume: article.journalInfo?.volume, issue: article.journalInfo?.issue, pages: article.journalInfo?.pages, year: article.journalInfo?.publicationDate?.year, }, doi: article.doi, ...(input.includeMeshTerms && { meshTerms: article.meshTerms }), })); } finalOutputText = JSON.stringify({ requestedPmids: input.pmids || "N/A (history query)", articles: articlesToReturn, notFoundPmids, eFetchDetails: { urls: [eFetchUrl] }, }); } logger.notice("Successfully executed pubmed_fetch_contents tool.", { ...toolLogicContext, articlesReturned: articlesCount, }); return { content: finalOutputText, articlesReturned: articlesCount, eFetchUrl, }; }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cyanheads/pubmed-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server