Skip to main content
Glama
scrape-webpage.ts2.73 kB
import { CallToolResult, ServerRequest, ServerNotification, } from "@modelcontextprotocol/sdk/types.js"; import { RequestHandlerExtra } from "@modelcontextprotocol/sdk/shared/protocol.js"; import { getClient, downloadImageAsBase64 } from "../utils"; import { scrapeWebpageToolParamSchemaType } from "./tool-types"; export async function scrapeWebpageTool( params: scrapeWebpageToolParamSchemaType, extra: RequestHandlerExtra<ServerRequest, ServerNotification> ): Promise<CallToolResult> { const { url, sessionOptions, outputFormat } = params; let apiKey: string | undefined = undefined; // Access authInfo from the extra parameter if (extra.authInfo && extra.authInfo.extra?.isSSE) { apiKey = extra.authInfo.token; } try { const client = await getClient({ hbApiKey: apiKey }); const result = await client.scrape.startAndWait({ url, sessionOptions, scrapeOptions: { formats: outputFormat, }, }); if (result.error) { return { isError: true, content: [ { type: "text", text: result.error, }, ], }; } const response: CallToolResult = { content: [], isError: false, }; if (result.data?.markdown) { response.content.push({ type: "text", text: result.data.markdown, }); } if (result.data?.html) { response.content.push({ type: "text", text: result.data.html, }); } if (result.data?.links) { result.data.links.forEach((link) => { response.content.push({ type: "resource", resource: { uri: link, text: link, }, }); }); } if (result.data?.screenshot) { const imageData = await downloadImageAsBase64(result.data.screenshot); if (!imageData) { response.content.push({ type: "text", text: "Failed to get screenshot", }); response.isError = true; } else { response.content.push({ type: "image", data: imageData.data, mimeType: imageData.mimeType, }); } } return response; } catch (error) { return { content: [{ type: "text", text: `${error}` }], isError: true, }; } } export const scrapeWebpageToolName = "scrape_webpage"; export const scrapeWebpageToolDescription = "Scrape a webpage and extract its content in various formats. This tool allows fetching content from a single URL with configurable browser behavior options. Use this for extracting text content, HTML structure, collecting links, or capturing screenshots of webpages.";

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/BACH-AI-Tools/bach-hyperbrowser-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server