Skip to main content
Glama
crawl-webpages.ts2.51 kB
import { CallToolResult, ServerRequest, ServerNotification, } from "@modelcontextprotocol/sdk/types.js"; import { RequestHandlerExtra } from "@modelcontextprotocol/sdk/shared/protocol.js"; import { getClient } from "../utils"; import { crawlWebpagesToolParamSchemaType } from "./tool-types"; export async function crawlWebpagesTool( params: crawlWebpagesToolParamSchemaType, extra: RequestHandlerExtra<ServerRequest, ServerNotification> ): Promise<CallToolResult> { const { url, sessionOptions, outputFormat, ignoreSitemap, followLinks, maxPages, } = params; let apiKey: string | undefined = undefined; if (extra.authInfo && extra.authInfo.extra?.isSSE) { apiKey = extra.authInfo.token; } try { const client = await getClient({ hbApiKey: apiKey }); const result = await client.crawl.startAndWait({ url, sessionOptions, scrapeOptions: { formats: outputFormat, }, maxPages, ignoreSitemap, followLinks, }); if (result.error) { return { isError: true, content: [ { type: "text", text: result.error, }, ], }; } const response: CallToolResult = { content: [], isError: false, }; result.data?.forEach((page) => { if (page?.markdown) { response.content.push({ type: "text", text: page.markdown, }); } if (page?.html) { response.content.push({ type: "text", text: page.html, }); } if (page?.links) { page.links.forEach((link) => { response.content.push({ type: "resource", resource: { uri: link, text: link, }, }); }); } if (page?.screenshot) { response.content.push({ type: "image", data: page.screenshot, mimeType: "image/webp", }); } }); return response; } catch (error) { return { content: [{ type: "text", text: `${error}` }], isError: true, }; } } export const crawlWebpagesToolName = "crawl_webpages"; export const crawlWebpagesToolDescription = "Crawl a website starting from a URL and explore linked pages. This tool allows systematic collection of content from multiple pages within a domain. Use this for larger data collection tasks, content indexing, or site mapping.";

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/BACH-AI-Tools/bach-hyperbrowser-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server