web_read
Fetch and read web page content from a URL. Returns parsed text from HTML documents to supply information to language models.
Instructions
Alias of web.read
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| url | Yes | ||
| html | No |
Implementation Reference
- src/tools/webRead.ts:4-30 (handler)The main handler function for web_read. Uses JSDOM and Readability to parse HTML and extract readable content (title, byline, text, word count, links, meta).
export function webRead(args: { url: string, html?: string }) { const { url, html } = args; const doc = new JSDOM(html || '', { url }); const reader = new Readability(doc.window.document); const art = reader.parse(); if (!art) return { title: '', byline: '', lang: '', text: '', wordCount: 0, links: [], meta: {} }; const links: Array<{text: string, url: string}> = []; const anchorEls = doc.window.document.querySelectorAll('a[href]'); anchorEls.forEach(a => { const href = (a as HTMLAnchorElement).href; const text = (a as HTMLElement).textContent?.trim() || ''; if (href) links.push({ text, url: href }); }); const meta: Record<string,string> = {}; const metas = doc.window.document.querySelectorAll('meta[name], meta[property]'); metas.forEach((m:any) => { const key = m.getAttribute('name') || m.getAttribute('property'); const val = m.getAttribute('content'); if (key && val) meta[key] = val; }); return { title: art.title || '', byline: art.byline || '', lang: (doc.window.document.documentElement.getAttribute('lang') || '').toLowerCase(), text: art.textContent || '', wordCount: (art.textContent || '').split(/\s+/).filter(Boolean).length, links, meta }; } - src/server.ts:87-87 (schema)Input schema for web.read/web_read tool: url (string, required) and html (string, optional).
const webReadShape = { url: z.string(), html: z.string().optional() }; - src/server.ts:88-101 (registration)Registration of the 'web.read' tool and its alias 'web_read' with the MCP server, both calling the webRead handler.
server.tool('web.read', 'Extract readable content from given HTML (or pass html from web.fetch).', webReadShape, OPEN, async ({ url, html }) => { const res = webRead({ url, html }); return { content: [{ type: 'text', text: JSON.stringify(res) }] }; } ); server.tool('web_read', 'Alias of web.read', webReadShape, OPEN, async ({ url, html }) => { const res = webRead({ url, html }); return { content: [{ type: 'text', text: JSON.stringify(res) }] }; } );