Skip to main content
Glama

MCP Toolkit

by zxfgds
content-extractor.ts2.5 kB
import { load } from 'cheerio'; import { MAIN_CONTENT_SELECTORS, NOISE_SELECTORS, JS_REQUIRED_PATTERNS } from './selectors.js'; export class ContentExtractor { static extractMainContent($: ReturnType<typeof load>, selector?: string): string { let content = ''; if (selector) { content = $(selector).text().trim(); } if (!content) { // 尝试找到主要内容区域 for (const mainSelector of MAIN_CONTENT_SELECTORS) { const element = $(mainSelector); if (element.length > 0) { // 移除干扰元素 element.find(NOISE_SELECTORS.join(',')).remove(); content = element.text().trim(); if (content) break; } } } // 如果还是没找到内容,尝试从 body 中提取 if (!content) { const $body = $('body').clone(); // 移除干扰元素 $body.find(NOISE_SELECTORS.join(',')).remove(); content = $body.text().trim(); } // 清理文本 return content .replace(/[\n\r]+/g, '\n') // 合并多个换行 .replace(/[ \t]+/g, ' ') // 合并多个空格 .split('\n') // 按行分割 .map(line => line.trim()) // 清理每行的空白 .filter(Boolean) // 移除空行 .join('\n'); // 重新组合 } static extractLinks($: ReturnType<typeof load>): string[] { const links: string[] = []; $('a[href]').each((_, element) => { const $link = $(element); const href = $link.attr('href'); const text = $link.text().trim(); if (href && text && !href.startsWith('#') && !href.startsWith('javascript:')) { try { const fullUrl = new URL(href, $('base').attr('href')).toString(); links.push(`[${text}] ${fullUrl}`); } catch (e) { // 忽略无效的 URL } } }); return [...new Set(links)]; // 去重 } static mayNeedJavaScript(url: string, html: string, $: ReturnType<typeof load>): boolean { // 检查 URL 是否包含可能需要 JS 的特征 if (JS_REQUIRED_PATTERNS.some(pattern => pattern.test(url))) { return true; } // 检查页面内容是否暗示需要 JS const hasNoContent = $('body').text().trim().length < 100; const hasLoadingIndicator = $('body').text().toLowerCase().includes('loading'); const hasReactRoot = $('#root').length > 0 || $('#app').length > 0; return hasNoContent || hasLoadingIndicator || hasReactRoot; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/zxfgds/mcp-toolkit'

If you have feedback or need assistance with the MCP directory API, please join our Discord server