web-fetch
Fetch and extract content from URLs, supporting HTML text extraction, JSON, and plain text formats with configurable domain security controls.
Instructions
Fetch content from a URL. Supports HTML (extracts text), JSON, and plain text. By default, only allows trusted domains for security. Set allow_any_domain=true to fetch from any URL (use with caution).
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| url | Yes | The URL to fetch content from | |
| allow_any_domain | No | Allow fetching from any domain (default: false, only trusted domains) | |
| include_headers | No | Include response headers in the output (default: false) |
Implementation Reference
- src/tools/web-fetch.ts:230-255 (handler)The handler function that executes the web-fetch tool logic: fetches the URL using fetchUrl helper, formats the result, and returns text content or error.async ({ url, allow_any_domain = false, include_headers = false }) => { try { const result = await fetchUrl(url, allow_any_domain, include_headers); const formattedResult = formatFetchResult(result); return { content: [ { type: "text" as const, text: formattedResult, }, ], }; } catch (error) { return { content: [ { type: "text" as const, text: `Error fetching URL: ${error instanceof Error ? error.message : "Unknown error"}`, }, ], isError: true, }; } }, );
- src/tools/web-fetch.ts:216-228 (schema)Input schema for the web-fetch tool using Zod: requires URL, optional allow_any_domain and include_headers booleans.inputSchema: { url: z.string().url().describe("The URL to fetch content from"), allow_any_domain: z .boolean() .optional() .describe( "Allow fetching from any domain (default: false, only trusted domains)", ), include_headers: z .boolean() .optional() .describe("Include response headers in the output (default: false)"), },
- src/tools/web-fetch.ts:209-256 (registration)registerWebFetchTool function that registers the 'web-fetch' tool on the MCP server, including name, description, input schema, and handler.export function registerWebFetchTool(server: McpServer) { server.registerTool( "web-fetch", { title: "Web Fetch", description: "Fetch content from a URL. Supports HTML (extracts text), JSON, and plain text. By default, only allows trusted domains for security. Set allow_any_domain=true to fetch from any URL (use with caution).", inputSchema: { url: z.string().url().describe("The URL to fetch content from"), allow_any_domain: z .boolean() .optional() .describe( "Allow fetching from any domain (default: false, only trusted domains)", ), include_headers: z .boolean() .optional() .describe("Include response headers in the output (default: false)"), }, }, async ({ url, allow_any_domain = false, include_headers = false }) => { try { const result = await fetchUrl(url, allow_any_domain, include_headers); const formattedResult = formatFetchResult(result); return { content: [ { type: "text" as const, text: formattedResult, }, ], }; } catch (error) { return { content: [ { type: "text" as const, text: `Error fetching URL: ${error instanceof Error ? error.message : "Unknown error"}`, }, ], isError: true, }; } }, ); }
- src/tools/index.ts:35-36 (registration)Invocation of registerWebFetchTool during tools registration in index.ts, followed by logging.registerWebFetchTool(server); logger.tool("web-fetch", "registered");
- src/tools/web-fetch.ts:103-184 (helper)Core helper function fetchUrl that performs the actual HTTP fetch, URL validation, content type processing (JSON pretty-print, HTML text extraction, text truncation), and returns structured FetchResult.async function fetchUrl( url: string, allowAnyDomain = false, includeHeaders = false, ): Promise<FetchResult> { // Validate URL if (!isUrlAllowed(url, allowAnyDomain)) { throw new Error( allowAnyDomain ? "Invalid URL protocol (only http/https allowed)" : `URL not allowed. Allowed domains: ${ALLOWED_DOMAINS.join(", ")}, and Duyet's GitHub repositories (github.com/duyet/*, raw.githubusercontent.com/duyet/*, gist.github.com/duyet/*)`, ); } try { const response = await fetch(url, { headers: { "User-Agent": "Mozilla/5.0 (compatible; DuyetMCP/0.1; +https://duyet.net/)", }, redirect: "follow", }); const contentType = response.headers.get("content-type") || "text/plain"; const status = response.status; // Check content length to prevent memory issues const contentLength = response.headers.get("content-length"); if (contentLength) { const size = Number.parseInt(contentLength, 10); if (size > MAX_CONTENT_LENGTH) { throw new Error( `Content too large: ${(size / 1024 / 1024).toFixed(2)}MB (max ${MAX_CONTENT_LENGTH / 1024 / 1024}MB)`, ); } } // Get response headers if requested const headers: Record<string, string> = {}; if (includeHeaders) { response.headers.forEach((value, key) => { headers[key] = value; }); } let content: string; // Process based on content type if (contentType.includes("application/json")) { const json = await response.json(); content = JSON.stringify(json, null, 2); } else if (contentType.includes("text/html")) { const html = await response.text(); // For HTML, extract readable text content content = extractTextFromHtml(html); // Limit content length for large pages if (content.length > 10000) { content = `${content.substring(0, 10000)}\n\n[Content truncated...]`; } } else { // Plain text or other types content = await response.text(); // Limit content length if (content.length > 50000) { content = `${content.substring(0, 50000)}\n\n[Content truncated...]`; } } return { url, status, contentType, content, headers: includeHeaders ? headers : undefined, }; } catch (error) { throw new Error( `Failed to fetch URL: ${error instanceof Error ? error.message : "Unknown error"}`, ); } }