StatPearls MCP Server

html.ts•9.67 KiB

/** * HTML parsing utilities for the StatPearls MCP server */ import * as cheerio from 'cheerio'; import type { SearchResult, ArticleSection, ArticleContent } from '../types/index.js'; import { ParsingError } from './error.js'; import { parseStatPearlsArticle, parseStatPearlsSearchResults } from './statpearls-parser.js'; import type { StatPearlsSearchResult } from '../types/statpearls.js'; /** * Extracts search results from the NCBI StatPearls search page * @param html The HTML content of the search page * @returns Array of search results */ /** * Extracts search results from the NCBI StatPearls search page * Uses the more comprehensive parser for better results * @param html The HTML content of the search page * @returns Array of search results */ export function extractSearchResults(html: string): SearchResult[] { try { // Use the comprehensive parser to extract detailed search results const parsedResults = parseStatPearlsSearchResults(html); // Convert the detailed StatPearlsSearchResult to the simpler SearchResult format return parsedResults.map((result: StatPearlsSearchResult) => ({ title: result.title, url: result.url, description: result.description || (result.authors && result.authors.length > 0 ? `By ${result.authors.join(', ')}` : 'No description available') })); } catch (error) { // If the comprehensive parser fails, fall back to the simpler implementation try { const $ = cheerio.load(html); const results: SearchResult[] = []; // NCBI search results are typically in elements with class 'rslt' $('.rslt').each((i, element) => { // Extract title from the first link in the result const titleElement = $(element).find('a').first(); const title = titleElement.text().trim(); // Extract URL from the link const relativeUrl = titleElement.attr('href'); const url = relativeUrl ? new URL(relativeUrl, 'https://www.ncbi.nlm.nih.gov').toString() : ''; // Extract description from the result text const descriptionElement = $(element).find('p').first(); const description = descriptionElement.text().trim(); if (title && url) { results.push({ title, url, description: description || 'No description available' }); } }); return results; } catch (fallbackError) { throw new ParsingError(`Failed to extract search results: ${error instanceof Error ? error.message : String(error)}`); } } } /** * Extracts the content of a StatPearls article * @param html The HTML content of the article * @param url The URL of the article * @returns Structured article content */ /** * Extracts the content of a StatPearls article using the comprehensive parser * @param html The HTML content of the article * @param url The URL of the article * @returns Structured article content conforming to ArticleContent interface */ export function extractArticleContent(html: string, url: string): ArticleContent { try { // First try to use the comprehensive parser try { // Use the comprehensive parser to extract detailed article data const parsedArticle = parseStatPearlsArticle(html, url); // Convert the detailed StatPearlsArticle to the simpler ArticleContent format const sections: ArticleSection[] = []; const sectionMap: Record<string, ArticleSection> = {}; // First, collect all sections from the HTML to maintain proper order const $ = cheerio.load(html); const orderedSections: {heading: string, content: string}[] = []; // Find all section divs within the jig-ncbiinpagenav container $('.jig-ncbiinpagenav div[id^="article-"]').each((i, el) => { const heading = $(el).find('> h2').first().text().trim(); // Skip empty headings if (!heading || !heading.trim()) { return; } // Extract the content HTML let sectionContent = ''; $(el).children().each((j, child) => { if (j > 0) { // Skip the heading sectionContent += $.html(child); } }); // Add to ordered sections if not a duplicate if (!orderedSections.some(s => s.heading === heading)) { orderedSections.push({ heading, content: sectionContent }); } }); // Add abstract if available if (parsedArticle.abstract) { sectionMap['Abstract'] = { heading: 'Abstract', content: parsedArticle.abstract, level: 2 }; } // Add introduction if available if (parsedArticle.introduction) { sectionMap['Introduction'] = { heading: 'Introduction', content: parsedArticle.introduction, level: 2 }; } // Add objectives if available if (parsedArticle.objectives && parsedArticle.objectives.items.length > 0) { const objectivesContent = `<ul>${parsedArticle.objectives.items.map(item => `<li>${item}</li>`).join('')}</ul>`; sectionMap['Objectives'] = { heading: 'Objectives', content: objectivesContent, level: 2 }; } // Add references section if available if (parsedArticle.references && parsedArticle.references.length > 0) { const referencesContent = parsedArticle.references.map(ref => `<p id="ref-${ref.id}">${ref.number}. ${ref.text}</p>` ).join(''); sectionMap['References'] = { heading: 'References', content: referencesContent, level: 2 }; } // Now build the final sections array in the correct order // First, add special sections in a specific order if they exist const specialSections = ['Abstract', 'Introduction', 'Objectives']; for (const heading of specialSections) { if (sectionMap[heading]) { sections.push(sectionMap[heading]); // Mark as used delete sectionMap[heading]; } } // Then add all other sections in the order they appear in the HTML for (const section of orderedSections) { // Skip sections we've already added if (specialSections.includes(section.heading) || section.heading === 'References') { continue; } sections.push({ heading: section.heading, content: section.content, level: 2 }); } // Finally, add References at the end if it exists if (sectionMap['References']) { sections.push(sectionMap['References']); } return { title: parsedArticle.title, url: parsedArticle.url, sections }; } catch (parserError) { // If the comprehensive parser fails, fall back to a simpler direct extraction console.error("Comprehensive parser failed, falling back to direct extraction:", parserError); const $ = cheerio.load(html); const sections: ArticleSection[] = []; // Extract the article title const title = $('.content-title').text().trim() || $('h1').first().text().trim() || 'Untitled Article'; // Find all section divs within the jig-ncbiinpagenav container $('.jig-ncbiinpagenav div[id^="article-"]').each((i, el) => { const heading = $(el).find('> h2').first().text().trim(); // Extract the content HTML let sectionContent = ''; $(el).children().each((j, child) => { if (j > 0) { // Skip the heading sectionContent += $.html(child); } }); // Add the section if it has a non-empty heading and isn't a duplicate if (heading && heading.trim() && !sections.some(s => s.heading === heading)) { sections.push({ heading: heading, content: sectionContent, level: 2 }); } }); return { title, url, sections }; } } catch (error) { throw new ParsingError(`Failed to extract article content: ${error instanceof Error ? error.message : String(error)}`); } } /** * Filters out unwanted sections from the article content * @param content The article content to filter * @returns Filtered article content */ export function filterSections(content: ArticleContent): ArticleContent { // List of section headings to exclude (case-insensitive) const excludedSections = [ 'references', 'author information', 'copyright', 'disclosure', 'article information', 'Objectives', 'Continuing Education Activity', 'Enhancing Healthcare Team Outcomes', 'Review Questions' ]; // Filter out unwanted sections const filteredSections = content.sections.filter(section => { const heading = section.heading.toLowerCase(); return !excludedSections.some(excluded => heading.includes(excluded.toLowerCase())); }); return { ...content, sections: filteredSections }; } /** * Extracts tables from HTML content * @param html The HTML content containing tables * @returns Array of HTML table elements */ export function extractTables(html: string): string[] { try { const $ = cheerio.load(html); const tables: string[] = []; $('table').each((i, element) => { tables.push($.html(element)); }); return tables; } catch (error) { throw new ParsingError(`Failed to extract tables: ${error instanceof Error ? error.message : String(error)}`); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jpoles1/statpearls-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

html.ts•9.67 KiB