Skip to main content
Glama

PubMed MCP Server

by ncukondo
pubmed-api.ts24.3 kB
/** * PubMed API utility using NCBI E-utilities * Based on https://www.ncbi.nlm.nih.gov/books/NBK25499/ */ import { XMLParser } from 'fast-xml-parser'; import { promises as fs } from 'fs'; import { join } from 'path'; // Global rate limiter to ensure requests across all API instances respect rate limits class GlobalRateLimiter { private static instance: GlobalRateLimiter; private queue: Promise<void> = Promise.resolve(); static getInstance(): GlobalRateLimiter { if (!GlobalRateLimiter.instance) { GlobalRateLimiter.instance = new GlobalRateLimiter(); } return GlobalRateLimiter.instance; } async execute<T>(delayMs: number, task: () => Promise<T>): Promise<T> { const execution = this.queue.then(async () => { await new Promise(resolve => setTimeout(resolve, delayMs)); return task(); }); // Update queue for next request (ignore errors to prevent queue from stopping) this.queue = execution.then(() => {}, () => {}); return execution; } } export interface PubMedOptions { email: string; apiKey?: string; cacheDir?: string; cacheTTL?: number; // Cache TTL in seconds, default: 86400 (1 day) } export interface SearchResult { idList: string[]; count: number; retMax: number; retStart: number; } export interface Article { pmid: string; title: string; authors: string[]; abstract?: string; journal: string; pubDate: string; doi?: string; pmcId?: string; fullText?: string; hasFullText?: boolean; } export interface FullTextResult { pmid: string; fullText: string | null; } export interface PubMedAPI { search: (query: string, options?: SearchOptions) => Promise<SearchResult>; fetchArticles: (pmids: string[]) => Promise<Article[]>; searchAndFetch: (query: string, options?: SearchAndFetchOptions) => Promise<Article[]>; checkFullTextAvailability: (pmid: string) => Promise<{ hasFullText: boolean; pmcId?: string }>; getFullText: (pmids: string[]) => Promise<FullTextResult[]>; } export interface SearchOptions { retMax?: number; retStart?: number; sort?: 'relevance' | 'pub_date' | 'author' | 'journal'; dateFrom?: string; dateTo?: string; } export interface SearchAndFetchOptions extends SearchOptions { maxResults?: number; } const BASE_URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils'; /** * Initialize PubMed API client with email and optional API key */ // Full text availability check result export interface FullTextAvailability { hasFullText: boolean; pmcId?: string; } interface CacheEntry<T> { data: T; timestamp: number; } interface CacheUtils { ensureCacheDir: () => Promise<void>; getCachedSummary: (pmid: string) => Promise<Article | null>; setCachedSummary: (pmid: string, article: Article) => Promise<void>; getCachedFullText: (pmid: string) => Promise<string | null>; setCachedFullText: (pmid: string, fullText: string) => Promise<void>; isCacheEntryValid: (timestamp: number) => boolean; } export function createPubMedAPI(options: PubMedOptions): PubMedAPI { const { email, apiKey, cacheDir, cacheTTL = 86400 } = options; const buildUrl = (tool: string, params: Record<string, string | number>) => { const url = new URL(`${BASE_URL}/${tool}.fcgi`); url.searchParams.set('email', email); if (apiKey) { url.searchParams.set('api_key', apiKey); } Object.entries(params).forEach(([key, value]) => { url.searchParams.set(key, String(value)); }); return url.toString(); }; // Cache utility functions const createCacheUtils = (): CacheUtils | null => { if (!cacheDir) return null; const summaryDir = join(cacheDir, 'summary'); const fulltextDir = join(cacheDir, 'fulltext'); const ensureCacheDir = async (): Promise<void> => { try { await fs.mkdir(summaryDir, { recursive: true }); await fs.mkdir(fulltextDir, { recursive: true }); } catch (error) { console.error('Error creating cache directories:', error); throw error; } }; const isCacheEntryValid = (timestamp: number): boolean => { const now = Date.now(); const age = (now - timestamp) / 1000; // Convert to seconds return age < cacheTTL; }; const getCachedSummary = async (pmid: string): Promise<Article | null> => { try { const filePath = join(summaryDir, `${pmid}.json`); const content = await fs.readFile(filePath, 'utf8'); const cacheEntry: CacheEntry<Article> = JSON.parse(content); if (isCacheEntryValid(cacheEntry.timestamp)) { return cacheEntry.data; } else { // Cache expired, remove the file await fs.unlink(filePath).catch(() => {}); return null; } } catch (error) { // File doesn't exist or other error return null; } }; const setCachedSummary = async (pmid: string, article: Article): Promise<void> => { try { await ensureCacheDir(); const filePath = join(summaryDir, `${pmid}.json`); const cacheEntry: CacheEntry<Article> = { data: article, timestamp: Date.now() }; await fs.writeFile(filePath, JSON.stringify(cacheEntry, null, 2)); } catch (error) { console.error('Error writing summary cache:', error); } }; const getCachedFullText = async (pmid: string): Promise<string | null> => { try { const filePath = join(fulltextDir, `${pmid}.md`); const content = await fs.readFile(filePath, 'utf8'); const lines = content.split('\n'); // First line should contain timestamp metadata const timestampMatch = lines[0].match(/^<!--\s*timestamp:\s*(\d+)\s*-->$/); if (!timestampMatch) { // Old format or corrupted file, remove it await fs.unlink(filePath).catch(() => {}); return null; } const timestamp = parseInt(timestampMatch[1]); if (isCacheEntryValid(timestamp)) { // Return content without the timestamp line return lines.slice(1).join('\n').trim(); } else { // Cache expired, remove the file await fs.unlink(filePath).catch(() => {}); return null; } } catch (error) { // File doesn't exist or other error return null; } }; const setCachedFullText = async (pmid: string, fullText: string): Promise<void> => { try { await ensureCacheDir(); const filePath = join(fulltextDir, `${pmid}.md`); const timestamp = Date.now(); const content = `<!-- timestamp: ${timestamp} -->\n${fullText}`; await fs.writeFile(filePath, content); } catch (error) { console.error('Error writing fulltext cache:', error); } }; return { ensureCacheDir, getCachedSummary, setCachedSummary, getCachedFullText, setCachedFullText, isCacheEntryValid }; }; const cache = createCacheUtils(); // Decode HTML entities to readable characters const decodeHtmlEntities = (text: string): string => { const entityMap: { [key: string]: string } = { '&amp;': '&', '&lt;': '<', '&gt;': '>', '&quot;': '"', '&#39;': "'", '&#8217;': "'", // right single quotation mark '&#8216;': "'", // left single quotation mark '&#8220;': '"', // left double quotation mark '&#8221;': '"', // right double quotation mark '&#8211;': '–', // en dash '&#8212;': '—', // em dash '&#8722;': '−', // minus sign '&#160;': ' ', // non-breaking space '&#8201;': ' ', // thin space '&#8804;': '≤', // less than or equal to '&#8805;': '≥', // greater than or equal to '&nbsp;': ' ' }; return text.replace(/&[#\w]+;/g, (entity) => { return entityMap[entity] || entity; }); }; // Extract structured sections from PMC article body type ExtractTextFn = (node: unknown) => string; interface SectionNode { title?: unknown; sec?: SectionNode | SectionNode[]; [key: string]: unknown; } interface BodyNode { sec?: SectionNode | SectionNode[]; } const extractStructuredContent = ( bodyNode: BodyNode, extractTextFromNode: ExtractTextFn ): string => { if (!bodyNode.sec) { return ''; } const sections = Array.isArray(bodyNode.sec) ? bodyNode.sec : [bodyNode.sec]; const content = sections.flatMap(section => { if (!section) return []; // Extract section title const sectionTitle = section.title ? [`### ${extractTextFromNode(section.title)}`] : []; // Extract section content const sectionContent = [extractTextFromNode(section)]; return [...sectionTitle,...sectionContent]; }).join('\n\n').trim(); return content; }; const makeRequest = async (url: string): Promise<any> => { // Rate limiting: 3 requests per second without API key, 10 with API key const delayMs = apiKey ? 100 : 334; const limiter = GlobalRateLimiter.getInstance(); return limiter.execute(delayMs, async () => { const response = await fetch(url); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } return response.text(); }); }; // Initialize XML parser with appropriate options const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: "@_", textNodeName: "#text", parseAttributeValue: true, trimValues: true }); const search = async (query: string, options: SearchOptions = {}): Promise<SearchResult> => { const { retMax = 20, retStart = 0, sort = 'relevance', dateFrom, dateTo } = options; let searchQuery = query; if (dateFrom || dateTo) { const from = dateFrom || '1900/01/01'; const to = dateTo || '3000/12/31'; searchQuery += ` AND ("${from}"[Date - Publication] : "${to}"[Date - Publication])`; } const params = { db: 'pubmed', term: searchQuery, retmax: retMax, retstart: retStart, sort: sort, usehistory: 'y' }; const url = buildUrl('esearch', params); const xmlResponse = await makeRequest(url); const parsedData = parser.parse(xmlResponse); const searchResult = parsedData.eSearchResult; const idList = searchResult.IdList ? (Array.isArray(searchResult.IdList.Id) ? searchResult.IdList.Id.map(String) : [String(searchResult.IdList.Id)]) : []; const count = parseInt(String(searchResult.Count || 0)); return { idList, count, retMax, retStart }; }; const fetchArticles = async (pmids: string[]): Promise<Article[]> => { if (pmids.length === 0) return []; // Check cache for existing articles if cache is enabled const cachedArticles: Article[] = !cache ? [] : (await Promise.all(pmids.map(async (pmid) => { const cached = await cache.getCachedSummary(pmid); return cached ? [cached] : []; }))).flat(); const uncachedPmids: string[] = pmids.filter( pmid => !cachedArticles.some(article => article.pmid === pmid) ); // If all articles are cached, return them if (uncachedPmids.length === 0) { return cachedArticles; } // Fetch uncached articles from API const params = { db: 'pubmed', id: uncachedPmids.join(','), retmode: 'xml', rettype: 'abstract' }; const url = buildUrl('efetch', params); const xmlResponse = await makeRequest(url); const parsedData = parser.parse(xmlResponse); const fetchedArticles: Article[] = []; const pubmedArticles = parsedData.PubmedArticleSet?.PubmedArticle || []; const articlesArray = Array.isArray(pubmedArticles) ? pubmedArticles : [pubmedArticles]; for (const article of articlesArray) { if (!article.MedlineCitation) continue; const medlineCitation = article.MedlineCitation; const pubmedData = article.PubmedData; const pmid = String(medlineCitation.PMID?.['#text'] || medlineCitation.PMID || ''); const title = medlineCitation.Article?.ArticleTitle?.['#text'] || medlineCitation.Article?.ArticleTitle || ''; // Extract authors const authors: string[] = []; const authorList = medlineCitation.Article?.AuthorList?.Author; if (authorList) { const authorsArray = Array.isArray(authorList) ? authorList : [authorList]; authorsArray.forEach((author: any) => { const lastName = author.LastName?.['#text'] || author.LastName || ''; const foreName = author.ForeName?.['#text'] || author.ForeName || ''; if (lastName) { authors.push(foreName ? `${lastName}, ${foreName}` : lastName); } }); } // Extract abstract const abstractText = medlineCitation.Article?.Abstract?.AbstractText; let abstract: string | undefined = undefined; if (abstractText) { if (Array.isArray(abstractText)) { // Handle multiple AbstractText sections abstract = abstractText .map((section: any) => { const text = section?.['#text'] || section; return typeof text === 'string' ? text : String(text); }) .filter(text => text && text.trim()) .join(' ') .trim() || undefined; } else { // Handle single AbstractText const text = abstractText?.['#text'] || abstractText; abstract = typeof text === 'string' ? text : (text ? String(text) : undefined); } } // Extract journal const journalTitle = medlineCitation.Article?.Journal?.Title?.['#text'] || medlineCitation.Article?.Journal?.Title || ''; // Extract publication date const pubDateObj = medlineCitation.Article?.Journal?.JournalIssue?.PubDate; let pubDate = ''; if (pubDateObj) { const year = pubDateObj.Year?.['#text'] || pubDateObj.Year || ''; const month = pubDateObj.Month?.['#text'] || pubDateObj.Month || ''; const day = pubDateObj.Day?.['#text'] || pubDateObj.Day || ''; pubDate = [year, month, day].filter(Boolean).join('-'); } // Extract DOI const eLocationIDs = medlineCitation.ELocationID; let doi = undefined; if (eLocationIDs) { const locations = Array.isArray(eLocationIDs) ? eLocationIDs : [eLocationIDs]; const doiLocation = locations.find((loc: any) => loc['@_EIdType'] === 'doi'); doi = doiLocation ? String(doiLocation['#text'] || doiLocation) : undefined; } // Extract PMC ID const articleIds = pubmedData?.ArticleIdList?.ArticleId; let pmcId = undefined; if (articleIds) { const ids = Array.isArray(articleIds) ? articleIds : [articleIds]; const pmcIdObj = ids.find((id: any) => id['@_IdType'] === 'pmc'); pmcId = pmcIdObj ? String(pmcIdObj['#text'] || pmcIdObj) : undefined; } const newArticle = { pmid, title, authors, abstract, journal: journalTitle, pubDate, doi, pmcId }; fetchedArticles.push(newArticle); // Cache the new article if cache is enabled if (cache) { // For better reliability in tests, we wait for cache operations try { await cache.setCachedSummary(pmid, newArticle); } catch (err) { console.error('Error caching article:', err); } } } // Combine cached and fetched articles, maintaining the original order const allArticles = [...cachedArticles, ...fetchedArticles]; return pmids.map(pmid => allArticles.find(article => article.pmid === pmid)).filter(Boolean) as Article[]; }; const searchAndFetch = async (query: string, options: SearchAndFetchOptions = {}): Promise<Article[]> => { const { maxResults = 20, ...searchOptions } = options; const searchResult = await search(query, { ...searchOptions, retMax: maxResults }); return fetchArticles(searchResult.idList); }; const checkFullTextAvailability = async (pmid: string): Promise<{ hasFullText: boolean; pmcId?: string }> => { const params = { dbfrom: 'pubmed', db: 'pmc', id: pmid, linkname: 'pubmed_pmc' }; const url = buildUrl('elink', params); try { const xmlResponse = await makeRequest(url); const parsedData = parser.parse(xmlResponse); const linkSets = parsedData.eLinkResult?.LinkSet; if (!linkSets) { return { hasFullText: false }; } const linkSet = Array.isArray(linkSets) ? linkSets[0] : linkSets; const linkSetDbs = linkSet.LinkSetDb; if (!linkSetDbs) { return { hasFullText: false }; } const linkSetDb = Array.isArray(linkSetDbs) ? linkSetDbs[0] : linkSetDbs; const links = linkSetDb.Link; if (!links) { return { hasFullText: false }; } const linkArray = Array.isArray(links) ? links : [links]; const pmcId = linkArray[0]?.Id; if (pmcId) { return { hasFullText: true, pmcId: String(pmcId) }; } return { hasFullText: false }; } catch (error) { console.error('Error checking full text availability:', error); return { hasFullText: false }; } }; const checkFullTextAvailabilityBatch = async (pmids: string[]): Promise<{ [pmid: string]: { hasFullText: boolean; pmcId?: string } }> => { if (pmids.length === 0) return {}; // For single PMID, use individual check for reliability if (pmids.length === 1) { const result = await checkFullTextAvailability(pmids[0]); return { [pmids[0]]: result }; } // For multiple PMIDs, fall back to individual checks to avoid batch API parsing issues // The batch API doesn't reliably map PMC IDs to specific PMIDs const results: { [pmid: string]: { hasFullText: boolean; pmcId?: string } } = {}; for (const pmid of pmids) { try { results[pmid] = await checkFullTextAvailability(pmid); } catch (error) { console.error(`Error checking availability for PMID ${pmid}:`, error); results[pmid] = { hasFullText: false }; } } return results; }; const getFullText = async (pmids: string[]): Promise<FullTextResult[]> => { if (pmids.length === 0) return []; // Check cache for existing full texts if cache is enabled const cachedResults: FullTextResult[] = []; const uncachedPmids: string[] = []; if (cache) { for (const pmid of pmids) { const cached = await cache.getCachedFullText(pmid); if (cached !== null) { cachedResults.push({ pmid, fullText: cached }); } else { uncachedPmids.push(pmid); } } } else { uncachedPmids.push(...pmids); } // If all full texts are cached, return them if (uncachedPmids.length === 0) { return cachedResults; } // Batch check full text availability for uncached PMIDs const availabilityResults = await checkFullTextAvailabilityBatch(uncachedPmids); // Group PMIDs by their PMC IDs for batch fetching const pmcToPmidMap: { [pmcId: string]: string[] } = {}; const resultsMap: { [pmid: string]: FullTextResult } = {}; // Initialize results and group by PMC ID uncachedPmids.forEach(pmid => { const availability = availabilityResults[pmid]; if (availability.hasFullText && availability.pmcId) { if (!pmcToPmidMap[availability.pmcId]) { pmcToPmidMap[availability.pmcId] = []; } pmcToPmidMap[availability.pmcId].push(pmid); } else { resultsMap[pmid] = { pmid, fullText: null }; } }); // Batch fetch full texts for PMC IDs for (const [pmcId, relatedPmids] of Object.entries(pmcToPmidMap)) { try { const params = { db: 'pmc', id: pmcId, retmode: 'xml' // Note: PMC database only supports rettype: null (empty) per NCBI documentation }; const url = buildUrl('efetch', params); const xmlResponse = await makeRequest(url); const parsedData = parser.parse(xmlResponse); const article = parsedData['pmc-articleset']?.article || parsedData.pmc_articleset?.article || parsedData.article; if (article) { const extractTextFromNode = (node: unknown): string => { if (node == null) return '' if (typeof node === 'string') { return decodeHtmlEntities(node) } if (Array.isArray(node)) { return node .map(extractTextFromNode) .filter(text => text.length > 0) .join('\n\n') // Use paragraph breaks for array elements } if (typeof node === 'object') { const obj = node as Record<string, unknown> const textValue = obj['#text'] if (typeof textValue === 'string') { return decodeHtmlEntities(textValue) } let text = '' for (const value of Object.values(obj)) { text += extractTextFromNode(value) + ' ' } return text.trim() } return '' } let fullText = ''; if (article.front?.['article-meta']?.['title-group']?.['article-title']) { const title = extractTextFromNode(article.front['article-meta']['title-group']['article-title']); fullText += `# ${title}\n\n`; } if (article.front?.['article-meta']?.abstract) { const abstract = extractTextFromNode(article.front['article-meta'].abstract); fullText += `## Abstract\n\n${abstract}\n\n`; } if (article.body) { // Try to extract structured content first const structuredContent = extractStructuredContent(article.body, extractTextFromNode); if (structuredContent) { fullText += `## Content\n\n${structuredContent}`; } else { // Fallback to basic text extraction const content = extractTextFromNode(article.body); fullText += `## Content\n\n${content}\n\n`; } } // Clean up text formatting fullText = fullText .replace(/[ \t]+/g, ' ') // Multiple spaces/tabs to single space .trim(); // Assign the same full text to all related PMIDs and cache it for (const pmid of relatedPmids) { resultsMap[pmid] = { pmid, fullText: fullText || null }; // Cache the full text if cache is enabled and fullText is not null if (cache && fullText) { try { await cache.setCachedFullText(pmid, fullText); } catch (err) { console.error('Error caching full text:', err); } } } } else { // No article found for this PMC ID relatedPmids.forEach(pmid => { resultsMap[pmid] = { pmid, fullText: null }; }); } } catch (error) { console.error(`Error fetching full text for PMC ID ${pmcId}:`, error); relatedPmids.forEach(pmid => { resultsMap[pmid] = { pmid, fullText: null }; }); } } // Combine cached and fetched results, maintaining the original order const allResults = [...cachedResults, ...Object.values(resultsMap)]; return pmids.map(pmid => allResults.find(result => result.pmid === pmid)).filter(Boolean) as FullTextResult[]; }; return { search, fetchArticles, searchAndFetch, checkFullTextAvailability, getFullText }; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ncukondo/pubmed-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server