fetch_url

Extract clean content from URLs by removing ads, navigation, and scripts, then convert to token-efficient Markdown for AI processing. Supports CSS selectors, multiple output formats, and smart caching.

Instructions

Convert any URL to clean, token-efficient Markdown. Strips ads, navigation, scripts, and noise. Returns clean content ready for LLM consumption.

Input Schema

TableJSON Schema

Name	Required	Description
`url`	Yes	The URL to fetch and convert to Markdown
`format`	No	Output format: markdown (default), json, text, html
`selector`	No	CSS selector to extract specific elements (e.g. 'article', '.content', '#main')
`model`	No	AI model ID for cost tracking (e.g. 'claude-sonnet-4-6', 'gpt-5')
`cache`	No	Set to false to bypass cache and force fresh fetch
`ttl`	No	Cache TTL in seconds (default 3600, max 86400)
`max_tokens`	No	Truncate output to fit within this token budget

Implementation Reference

src/index.ts:143-159 (handler)

Handler for the fetch_url tool.

async (params) => {
  const result = await fetchUrl(params);

  const meta = [
    `Tokens: ${result.tokens.toLocaleString()} (saved ${result.savingsPercent}% from ${result.originalTokens.toLocaleString()})`,
    `Cache: ${result.cached}`,
    `Fetch: ${result.fetchMs}ms`,
  ];
  if (result.title) meta.unshift(`Title: ${result.title}`);
  if (result.truncated) meta.push("Truncated: yes");

  return {
    content: [
      { type: "text" as const, text: `${meta.join(" | ")}\n\n---\n\n${result.content}` },
    ],
  };
}

src/index.ts:107-142 (registration)

Registration and schema definition for fetch_url.

server.tool(
  "fetch_url",
  "Convert any URL to clean, token-efficient Markdown. Strips ads, navigation, scripts, and noise. Returns clean content ready for LLM consumption.",
  {
    url: z.string().url().describe("The URL to fetch and convert to Markdown"),
    format: z
      .enum(["markdown", "json", "text", "html"])
      .optional()
      .describe("Output format: markdown (default), json, text, html"),
    selector: z
      .string()
      .optional()
      .describe(
        "CSS selector to extract specific elements (e.g. 'article', '.content', '#main')"
      ),
    model: z
      .string()
      .optional()
      .describe(
        "AI model ID for cost tracking (e.g. 'claude-sonnet-4-6', 'gpt-5')"
      ),
    cache: z
      .boolean()
      .optional()
      .describe("Set to false to bypass cache and force fresh fetch"),
    ttl: z
      .number()
      .optional()
      .describe("Cache TTL in seconds (default 3600, max 86400)"),
    max_tokens: z
      .number()
      .int()
      .positive()
      .optional()
      .describe("Truncate output to fit within this token budget"),
  },

src/index.ts:30-100 (helper)

Helper function that performs the API call for fetch_url.

async function fetchUrl(params: FetchParams): Promise<{
  content: string;
  tokens: number;
  originalTokens: number;
  savingsPercent: string;
  cached: string;
  fetchMs: string;
  truncated: boolean;
  title?: string;
}> {
  const apiKey = getApiKey();
  const searchParams = new URLSearchParams({ url: params.url });

  if (params.format) searchParams.set("format", params.format);
  if (params.selector) searchParams.set("selector", params.selector);
  if (params.model) searchParams.set("model", params.model);
  if (params.cache === false) searchParams.set("cache", "false");
  if (params.ttl !== undefined) searchParams.set("ttl", String(params.ttl));
  if (params.max_tokens !== undefined) searchParams.set("max_tokens", String(params.max_tokens));

  const response = await fetch(`${BASE_URL}/fetch?${searchParams}`, {
    headers: { Authorization: `Bearer ${apiKey}` },
  });

  if (!response.ok) {
    const body = await response.text();
    let message: string;
    try {
      message = JSON.parse(body).error;
    } catch {
      message = body;
    }
    throw new Error(`StripFeed API error ${response.status}: ${message}`);
  }

  const tokens = response.headers.get("X-StripFeed-Tokens") ?? "0";
  const originalTokens =
    response.headers.get("X-StripFeed-Original-Tokens") ?? "0";
  const savingsPercent =
    response.headers.get("X-StripFeed-Savings-Percent") ?? "0";
  const cached = response.headers.get("X-StripFeed-Cache") ?? "MISS";
  const fetchMs = response.headers.get("X-StripFeed-Fetch-Ms") ?? "0";
  const truncated = response.headers.get("X-StripFeed-Truncated") === "true";

  const contentType = response.headers.get("content-type") ?? "";

  if (contentType.includes("application/json")) {
    const json = await response.json();
    return {
      content: json.markdown,
      tokens: parseInt(tokens),
      originalTokens: parseInt(originalTokens),
      savingsPercent,
      cached,
      fetchMs,
      truncated,
      title: json.title,
    };
  }

  const content = await response.text();
  return {
    content,
    tokens: parseInt(tokens),
    originalTokens: parseInt(originalTokens),
    savingsPercent,
    cached,
    fetchMs,
    truncated,
  };
}

stripfeed-mcp-server

fetch_url

Instructions

Input Schema

Implementation Reference

Tool Definition Quality

Other Tools

Latest Blog Posts

MCP directory API