Skip to main content
Glama
scrape.ts1.95 kB
import { scrapePage } from "../lib/scraper.js"; import { extractContent } from "../lib/extractor.js"; import { getFromCache, saveToCache } from "../lib/cache.js"; import { JSDOM } from "jsdom"; interface ScrapeParams { url: string; selector?: string; javascript?: boolean; timeout?: number; } interface ScrapeResult { url: string; title: string; content: string; markdown: string; selectedContent?: string; fromCache: boolean; timestamp: string; } export async function scrape(params: ScrapeParams): Promise<ScrapeResult> { const { url, selector, javascript = false, timeout = 30000 } = params; const cached = getFromCache(url); if (cached && !selector) { return { url, title: cached.title, content: cached.content, markdown: cached.markdown, fromCache: true, timestamp: new Date(cached.cached_at).toISOString(), }; } try { const { html } = await scrapePage(url, { javascript, timeout }); const extracted = await extractContent(html, url); if (!extracted) { throw new Error("Failed to extract content"); } if (!cached) { saveToCache(url, { content: extracted.textContent, markdown: extracted.markdown, title: extracted.title, }); } let selectedContent: string | undefined; if (selector) { try { const dom = new JSDOM(html, { url }); const element = dom.window.document.querySelector(selector); selectedContent = element ? element.textContent || undefined : undefined; } catch (e) { console.error("Selector error:", e); } } return { url, title: extracted.title, content: extracted.textContent, markdown: extracted.markdown, selectedContent, fromCache: false, timestamp: new Date().toISOString(), }; } catch (error) { console.error("Scrape error:", error); throw error; } }

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/alucardeht/isis-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server