fetch_url
Extract clean content from URLs by removing ads, navigation, and scripts, then convert to token-efficient Markdown for AI processing. Supports CSS selectors, multiple output formats, and smart caching.
Instructions
Convert any URL to clean, token-efficient Markdown. Strips ads, navigation, scripts, and noise. Returns clean content ready for LLM consumption.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| url | Yes | The URL to fetch and convert to Markdown | |
| format | No | Output format: markdown (default), json, text, html | |
| selector | No | CSS selector to extract specific elements (e.g. 'article', '.content', '#main') | |
| model | No | AI model ID for cost tracking (e.g. 'claude-sonnet-4-6', 'gpt-5') | |
| cache | No | Set to false to bypass cache and force fresh fetch | |
| ttl | No | Cache TTL in seconds (default 3600, max 86400) | |
| max_tokens | No | Truncate output to fit within this token budget |
Implementation Reference
- src/index.ts:143-159 (handler)Handler for the fetch_url tool.
async (params) => { const result = await fetchUrl(params); const meta = [ `Tokens: ${result.tokens.toLocaleString()} (saved ${result.savingsPercent}% from ${result.originalTokens.toLocaleString()})`, `Cache: ${result.cached}`, `Fetch: ${result.fetchMs}ms`, ]; if (result.title) meta.unshift(`Title: ${result.title}`); if (result.truncated) meta.push("Truncated: yes"); return { content: [ { type: "text" as const, text: `${meta.join(" | ")}\n\n---\n\n${result.content}` }, ], }; } - src/index.ts:107-142 (registration)Registration and schema definition for fetch_url.
server.tool( "fetch_url", "Convert any URL to clean, token-efficient Markdown. Strips ads, navigation, scripts, and noise. Returns clean content ready for LLM consumption.", { url: z.string().url().describe("The URL to fetch and convert to Markdown"), format: z .enum(["markdown", "json", "text", "html"]) .optional() .describe("Output format: markdown (default), json, text, html"), selector: z .string() .optional() .describe( "CSS selector to extract specific elements (e.g. 'article', '.content', '#main')" ), model: z .string() .optional() .describe( "AI model ID for cost tracking (e.g. 'claude-sonnet-4-6', 'gpt-5')" ), cache: z .boolean() .optional() .describe("Set to false to bypass cache and force fresh fetch"), ttl: z .number() .optional() .describe("Cache TTL in seconds (default 3600, max 86400)"), max_tokens: z .number() .int() .positive() .optional() .describe("Truncate output to fit within this token budget"), }, - src/index.ts:30-100 (helper)Helper function that performs the API call for fetch_url.
async function fetchUrl(params: FetchParams): Promise<{ content: string; tokens: number; originalTokens: number; savingsPercent: string; cached: string; fetchMs: string; truncated: boolean; title?: string; }> { const apiKey = getApiKey(); const searchParams = new URLSearchParams({ url: params.url }); if (params.format) searchParams.set("format", params.format); if (params.selector) searchParams.set("selector", params.selector); if (params.model) searchParams.set("model", params.model); if (params.cache === false) searchParams.set("cache", "false"); if (params.ttl !== undefined) searchParams.set("ttl", String(params.ttl)); if (params.max_tokens !== undefined) searchParams.set("max_tokens", String(params.max_tokens)); const response = await fetch(`${BASE_URL}/fetch?${searchParams}`, { headers: { Authorization: `Bearer ${apiKey}` }, }); if (!response.ok) { const body = await response.text(); let message: string; try { message = JSON.parse(body).error; } catch { message = body; } throw new Error(`StripFeed API error ${response.status}: ${message}`); } const tokens = response.headers.get("X-StripFeed-Tokens") ?? "0"; const originalTokens = response.headers.get("X-StripFeed-Original-Tokens") ?? "0"; const savingsPercent = response.headers.get("X-StripFeed-Savings-Percent") ?? "0"; const cached = response.headers.get("X-StripFeed-Cache") ?? "MISS"; const fetchMs = response.headers.get("X-StripFeed-Fetch-Ms") ?? "0"; const truncated = response.headers.get("X-StripFeed-Truncated") === "true"; const contentType = response.headers.get("content-type") ?? ""; if (contentType.includes("application/json")) { const json = await response.json(); return { content: json.markdown, tokens: parseInt(tokens), originalTokens: parseInt(originalTokens), savingsPercent, cached, fetchMs, truncated, title: json.title, }; } const content = await response.text(); return { content, tokens: parseInt(tokens), originalTokens: parseInt(originalTokens), savingsPercent, cached, fetchMs, truncated, }; }