pilot_page_text
Extract clean readable text from any web page by removing scripts, styles, and non-content elements. Use this to get concise page content for analysis or processing.
Instructions
Extract clean text from the page (strips script/style/noscript/svg).
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| max_chars | No | Max characters to return (default: 20000). Prevents token bloat on large pages. |
Implementation Reference
- src/tools/page.ts:30-46 (handler)Registration of 'pilot_page_text' tool via server.tool(), including the handler function that extracts clean text from the page using getCleanText() and truncates via truncate().
export function registerPageTools(server: McpServer, bm: BrowserManager) { server.tool( 'pilot_page_text', 'Extract clean text from the page (strips script/style/noscript/svg).', { max_chars: z.number().optional().describe('Max characters to return (default: 20000). Prevents token bloat on large pages.'), }, async ({ max_chars }) => { await bm.ensureBrowser(); try { const text = await getCleanText(bm.getPage()); return { content: [{ type: 'text' as const, text: truncate(text, max_chars) }] }; } catch (err) { return { content: [{ type: 'text' as const, text: wrapError(err) }], isError: true }; } } ); - src/tools/page.ts:6-18 (helper)Helper function getCleanText() that clones the document body, strips script/style/noscript/svg elements, and returns cleaned innerText.
async function getCleanText(page: import('playwright').Page): Promise<string> { return await page.evaluate(() => { const body = document.body; if (!body) return ''; const clone = body.cloneNode(true) as HTMLElement; clone.querySelectorAll('script, style, noscript, svg').forEach(el => el.remove()); return clone.innerText .split('\n') .map(line => line.trim()) .filter(line => line.length > 0) .join('\n'); }); } - src/tools/page.ts:22-28 (helper)Helper function truncate() that truncates text to max_chars (default 20000) with a message about remaining chars.
function truncate(text: string, maxChars?: number): string { const limit = maxChars || DEFAULT_MAX_CHARS; if (text.length <= limit) return text; const truncated = text.slice(0, limit); const remaining = text.length - limit; return truncated + `\n\n── truncated: ${remaining} chars not shown (use max_chars to increase) ──`; } - src/tools/page.ts:35-35 (schema)Zod schema for the max_chars optional input parameter (default 20000).
max_chars: z.number().optional().describe('Max characters to return (default: 20000). Prevents token bloat on large pages.'), - src/tools/register.ts:60-80 (registration)Tool registration via registerAllTools() calling registerPageTools(). The tool is included in the 'standard' profile set (line 45).
function createFilteredServer(server: McpServer, allowed: Set<string>): McpServer { const originalTool = server.tool.bind(server); const filtered = Object.create(server) as McpServer; filtered.tool = ((...args: unknown[]) => { const name = args[0] as string; if (!allowed.has(name)) return; return (originalTool as Function).apply(server, args); }) as typeof server.tool; return filtered; } export function registerAllTools(server: McpServer, bm: BrowserManager, profile: ToolProfile = 'full'): void { const allowed = PROFILE_TOOLS[profile]; const effectiveServer = allowed ? createFilteredServer(server, allowed) : server; registerNavigationTools(effectiveServer, bm); registerSnapshotTools(effectiveServer, bm); registerInteractionTools(effectiveServer, bm); registerPageTools(effectiveServer, bm);