fetch-html
Extract HTML content from any URL or convert web pages to text-only format for streamlined processing and analysis with the MCP URL Fetcher server.
Instructions
Fetch content from any URL and convert to HTML format
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| extractText | No | Whether to extract text content only (default: false) | |
| url | Yes | URL to fetch content from |
Implementation Reference
- src/index.ts:201-240 (registration)Registration of the "fetch-html" MCP tool, specifying name, description, input schema with Zod, and the handler function.server.tool( "fetch-html", "Fetch content from any URL and convert to HTML format", { url: z.string().url().describe("URL to fetch content from"), extractText: z.boolean().optional().describe("Whether to extract text content only (default: false)"), }, async ({ url, extractText = false }) => { try { const response = await fetchUrl(url); const contentText = await response.text(); const detectedType = detectContentType(response, url); let htmlContent; if (extractText) { const plainText = await convertToText(contentText, detectedType, url); htmlContent = `<pre>${escapeHtml(plainText)}</pre>`; } else { htmlContent = await convertToHtml(contentText, detectedType, url); } // Record this fetch recordUrlFetch(url, "html"); return { content: [{ type: "text", text: htmlContent }], }; } catch (error) { return { isError: true, content: [ { type: "text", text: `Error converting to HTML: ${error instanceof Error ? error.message : String(error)}`, }, ], }; } } );
- src/index.ts:204-207 (schema)Input schema definition for the "fetch-html" tool using Zod validators for URL and optional extractText flag.{ url: z.string().url().describe("URL to fetch content from"), extractText: z.boolean().optional().describe("Whether to extract text content only (default: false)"), },
- src/index.ts:208-239 (handler)Handler function that executes the "fetch-html" tool: fetches URL, detects content type, conditionally extracts text or converts to HTML using helpers, records the fetch, and returns formatted content or error.async ({ url, extractText = false }) => { try { const response = await fetchUrl(url); const contentText = await response.text(); const detectedType = detectContentType(response, url); let htmlContent; if (extractText) { const plainText = await convertToText(contentText, detectedType, url); htmlContent = `<pre>${escapeHtml(plainText)}</pre>`; } else { htmlContent = await convertToHtml(contentText, detectedType, url); } // Record this fetch recordUrlFetch(url, "html"); return { content: [{ type: "text", text: htmlContent }], }; } catch (error) { return { isError: true, content: [ { type: "text", text: `Error converting to HTML: ${error instanceof Error ? error.message : String(error)}`, }, ], }; } }
- src/index.ts:409-606 (helper)Core helper function convertToHtml that performs format-specific conversions to HTML, used by the fetch-html handler for non-extractText mode.async function convertToHtml(content: string, sourceType: string, sourceUrl: string): Promise<string> { try { switch (sourceType) { case "html": // Already HTML, just sanitize it return sanitizeHtml(content, { allowedTags: sanitizeHtml.defaults.allowedTags.concat(["img", "h1", "h2", "h3", "h4", "h5", "h6"]), allowedAttributes: { ...sanitizeHtml.defaults.allowedAttributes, img: ["src", "alt", "title", "width", "height"], a: ["href", "name", "target"], }, }); case "json": try { // Format JSON as HTML const jsonObj = JSON.parse(content); return `<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>JSON Viewer</title> <style> body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; line-height: 1.6; padding: 20px; } pre { background-color: #f5f5f5; padding: 15px; border-radius: 5px; overflow-x: auto; } .json-key { color: #0033b3; } .json-string { color: #388E3C; } .json-number { color: #1976D2; } .json-boolean { color: #7E57C2; } .json-null { color: #5D4037; } </style> </head> <body> <h1>JSON Content</h1> <pre>${formatJsonForHtml(JSON.stringify(jsonObj, null, 2))}</pre> <footer> <p>Source: ${escapeHtml(sourceUrl)}</p> <p>Converted at: ${new Date().toLocaleString()}</p> </footer> </body> </html>`; } catch (e) { return `<pre>${escapeHtml(content)}</pre>`; } case "markdown": // Convert markdown to HTML const htmlContent = marked.parse(content); return `<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Markdown Content</title> <style> body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; line-height: 1.6; padding: 20px; max-width: 800px; margin: 0 auto; } img { max-width: 100%; height: auto; } pre { background-color: #f5f5f5; padding: 15px; border-radius: 5px; overflow-x: auto; } code { background-color: #f5f5f5; padding: 2px 4px; border-radius: 3px; } blockquote { border-left: 4px solid #ddd; padding-left: 15px; color: #666; } table { border-collapse: collapse; width: 100%; } table, th, td { border: 1px solid #ddd; } th, td { padding: 8px; text-align: left; } th { background-color: #f5f5f5; } </style> </head> <body> ${htmlContent} <footer> <p>Source: ${escapeHtml(sourceUrl)}</p> <p>Converted at: ${new Date().toLocaleString()}</p> </footer> </body> </html>`; case "csv": // Convert CSV to HTML table const jsonData = await csvtojson().fromString(content); if (jsonData.length === 0) { throw new Error("CSV data appears to be empty or invalid"); } // Get headers from the first row const headers = Object.keys(jsonData[0]); // Generate HTML table let tableHtml = '<table border="1"><thead><tr>'; // Add header row headers.forEach((header) => { tableHtml += `<th>${escapeHtml(header)}</th>`; }); tableHtml += "</tr></thead><tbody>"; // Add data rows jsonData.forEach((row) => { tableHtml += "<tr>"; headers.forEach((header) => { tableHtml += `<td>${escapeHtml(String(row[header]))}</td>`; }); tableHtml += "</tr>"; }); tableHtml += "</tbody></table>"; return `<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>CSV Data</title> <style> body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; padding: 20px; } table { border-collapse: collapse; width: 100%; margin-bottom: 20px; } th, td { padding: 8px; text-align: left; border: 1px solid #ddd; } th { background-color: #f5f5f5; position: sticky; top: 0; } tr:nth-child(even) { background-color: #f9f9f9; } .container { max-height: 600px; overflow-y: auto; margin-top: 20px; } </style> </head> <body> <h1>CSV Data</h1> <div class="container"> ${tableHtml} </div> <footer> <p>Source: ${escapeHtml(sourceUrl)}</p> <p>Converted at: ${new Date().toLocaleString()}</p> <p>Total rows: ${jsonData.length}</p> </footer> </body> </html>`; case "xml": try { // Parse XML to JSON then generate an HTML representation const jsonObj = xmlParser.parse(content); return `<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>XML Content</title> <style> body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; line-height: 1.6; padding: 20px; } pre { background-color: #f5f5f5; padding: 15px; border-radius: 5px; overflow-x: auto; } .xml-tag { color: #0033b3; } .xml-attr { color: #7E57C2; } .xml-content { color: #388E3C; } </style> </head> <body> <h1>XML Content</h1> <h2>Original XML</h2> <pre>${escapeHtml(content)}</pre> <h2>As JSON</h2> <pre>${formatJsonForHtml(JSON.stringify(jsonObj, null, 2))}</pre> <footer> <p>Source: ${escapeHtml(sourceUrl)}</p> <p>Converted at: ${new Date().toLocaleString()}</p> </footer> </body> </html>`; } catch (xmlError) { return `<pre>${escapeHtml(content)}</pre>`; } default: // Wrap plain text in HTML return `<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Text Content</title> <style> body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; line-height: 1.6; padding: 20px; } pre { background-color: #f5f5f5; padding: 15px; border-radius: 5px; overflow-x: auto; white-space: pre-wrap; } </style> </head> <body> <h1>Text Content</h1> <pre>${escapeHtml(content)}</pre> <footer> <p>Source: ${escapeHtml(sourceUrl)}</p> <p>Converted at: ${new Date().toLocaleString()}</p> </footer> </body> </html>`; } } catch (error) { throw new Error(`HTML conversion error: ${error instanceof Error ? error.message : String(error)}`); } }
- src/index.ts:40-51 (helper)Reusable fetchUrl helper function to retrieve content from a URL, handling errors and used by multiple fetch tools including fetch-html.async function fetchUrl(url: string) { try { const response = await fetch(url); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } return response; } catch (error) { console.error(`Error fetching URL: ${url}`, error); throw error; } }