Skip to main content
Glama
contentFetcher.js2.22 kB
import puppeteer from "puppeteer"; import logger from "../core/logger.js"; export class ContentFetcher { constructor() { this.browser = null; } async initBrowser() { if (!this.browser) { this.browser = await puppeteer.launch({ headless: "new", args: ["--no-sandbox", "--disable-setuid-sandbox"], }); } } async fetchContent(url) { await this.initBrowser(); const page = await this.browser.newPage(); try { // Set user agent to avoid bot detection await page.setUserAgent( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", ); // Block resources to speed up loading await page.setRequestInterception(true); page.on("request", (req) => { if ( ["image", "stylesheet", "font", "media"].includes(req.resourceType()) ) { req.abort(); } else { req.continue(); } }); await page.goto(url, { waitUntil: "domcontentloaded", timeout: 15000 }); // Extract main content const content = await page.evaluate(() => { // Remove clutter const selectorsToRemove = [ "nav", "footer", "header", "aside", ".ads", ".advertisement", "script", "style", ]; selectorsToRemove.forEach((sel) => { document.querySelectorAll(sel).forEach((el) => el.remove()); }); // Get text return document.body.innerText; }); const title = await page.title(); return { url, title, content: content.replace(/\s+/g, " ").trim().substring(0, 10000), // Limit size scrapedAt: new Date().toISOString(), }; } catch (error) { logger.error(`Scraping failed for ${url}: ${error.message}`); return { url, error: error.message, content: null, }; } finally { await page.close(); } } async close() { if (this.browser) { await this.browser.close(); this.browser = null; } } } export const contentFetcher = new ContentFetcher();

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/NosytLabs/presearch-search-api-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server