Skip to main content
Glama

actors-mcp-server

Official
by apify
MIT License
7,198
465
  • Apple
html.ts2 kB
import * as cheerio from 'cheerio'; interface CheerioElementLike { attribs: Record<string, string>; tagName: string; } interface NodeLike { type: string; } /** * Strips HTML and keeps only the structure. * * Removes styles, scripts, and other non-content elements. * Collapses whitespace and trims the result. * Keeps only href, src, alt, id, class, title, name, data-* attributes. * Removes HTML comments and spaces between tags. * Removes base64 encoded images. */ export function stripHtml(html: string): string { const $ = cheerio.load(html); // Remove all attributes except href (only on a), src, alt, id, class, title, name, data-* const allowedAttrs = ['href', 'src', 'alt', 'id', 'class', 'title', 'name']; $('*').each((_, element) => { const { attribs } = (element as CheerioElementLike); if (attribs) { Object.keys(attribs).forEach((attr) => { if (attr === 'href' && (element as CheerioElementLike).tagName !== 'a') { $(element).removeAttr(attr); } else if (!allowedAttrs.includes(attr) && !attr.startsWith('data-')) { $(element).removeAttr(attr); } }); } }); // Remove <style>, <script>, <noscript>, <iframe>, <svg>, <canvas>, <math> tags and their content $('style, script, noscript, iframe, svg, canvas, math').remove(); // Remove HTML comments $('*').contents().filter((_, element) => (element as NodeLike).type === 'comment').remove(); // Remove base64 encoded images $('img[src^="data:image/"]').remove(); let result; if (html.trim() === '') { result = ''; } else if (html.includes('<html')) { result = $.html(); } else { result = $('body').html() || ''; } // Collapse multiple spaces into one, remove spaces between tags, and trim result = result.replace(/\s+/g, ' ').replace(/>\s+</g, '><').trim(); return result; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/apify/actors-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server