superFetch MCP Server

fetch-url-tool.test.ts•22.4 KiB

import assert from 'node:assert/strict'; import { after, describe, it } from 'node:test'; import { NodeHtmlMarkdown } from 'node-html-markdown'; import * as cache from '../dist/lib/cache.js'; import { config } from '../dist/lib/config.js'; import { normalizeUrl } from '../dist/lib/fetch.js'; import { cleanupMarkdownArtifacts } from '../dist/lib/markdown-cleanup.js'; import { fetchUrlToolHandler } from '../dist/tools/fetch-url.js'; import { htmlToMarkdown, shutdownTransformWorkerPool, } from '../dist/transform/transform.js'; after(async () => { await shutdownTransformWorkerPool(); }); type FetchUrlResponse = Awaited<ReturnType<typeof fetchUrlToolHandler>>; function assertTextBlockMatchesStructured(response: FetchUrlResponse): void { const textBlock = response.content.find((block) => block.type === 'text'); assert.ok(textBlock && textBlock.type === 'text'); const parsed = JSON.parse(textBlock.text) as unknown; assert.equal(typeof parsed, 'object'); assert.ok(parsed && !Array.isArray(parsed)); assert.deepEqual(parsed, response.structuredContent); } describe('fetchUrlToolHandler', () => { it('inserts spaces after inline links/code without touching fenced code blocks', () => { const input = [ 'Chakra UI relies on [next-themes](https://example.com)to add support.', '', 'In most cases, it is set up in the `Provider`component.', '', '```js', 'const x = "[next-themes](https://example.com)to";', 'const y = "`Provider`component";', '```', ].join('\n'); const cleaned = cleanupMarkdownArtifacts(input); assert.ok( cleaned.includes( 'Chakra UI relies on [next-themes](https://example.com) to add support.' ) ); assert.ok( cleaned.includes( 'In most cases, it is set up in the `Provider` component.' ) ); assert.ok( cleaned.includes('const x = "[next-themes](https://example.com)to";') ); assert.ok(cleaned.includes('const y = "`Provider`component";')); }); it('returns a validation error when url is missing', async () => { const response = await fetchUrlToolHandler({ url: '' }); assert.equal(response.isError, true); const parsed = JSON.parse((response.content[0] as { text: string })?.text); assert.deepEqual(parsed, { error: 'URL is required', code: 'VALIDATION_ERROR', url: '', }); }); it('returns markdown content for successful fetches', async (t) => { const html = '<html><head><title>Test Page</title></head><body><p>Hello</p></body></html>'; t.mock.method(globalThis, 'fetch', async () => { return new Response(html, { status: 200, headers: { 'content-type': 'text/html' }, }); }); const response = await fetchUrlToolHandler({ url: 'https://example.com/test', }); const structured = response.structuredContent; assert.ok(structured); assert.equal(structured.url, 'https://example.com/test'); assert.equal(typeof structured.markdown, 'string'); assert.equal(structured.fromCache, false); assert.equal(typeof structured.fetchedAt, 'string'); assert.equal(typeof structured.contentSize, 'number'); assert.equal(typeof structured.cacheResourceUri, 'string'); assert.match( String(structured.cacheResourceUri), /^internal:\/\/cache\/markdown\/[a-f0-9.]+$/i ); const resourceLink = response.content.find( (block) => block.type === 'resource_link' ); assert.ok(resourceLink && resourceLink.type === 'resource_link'); assert.equal(resourceLink.uri, structured.cacheResourceUri); assert.equal(resourceLink.mimeType, 'text/markdown'); assert.ok((structured.markdown as string).includes('Hello')); assertTextBlockMatchesStructured(response); }); it('returns extracted metadata fields when available', async (t) => { const html = ` <html> <head> <title>Metadata Title</title> <meta name="description" content="Metadata description" /> <meta name="author" content="Metadata Author" /> <meta property="article:published_time" content="2026-01-01T00:00:00Z" /> <meta property="article:modified_time" content="2026-01-02T00:00:00Z" /> </head> <body><main><p>Hello metadata</p></main></body> </html> `; t.mock.method(globalThis, 'fetch', async () => { return new Response(html, { status: 200, headers: { 'content-type': 'text/html' }, }); }); const response = await fetchUrlToolHandler({ url: 'https://example.com/metadata', }); const structured = response.structuredContent; assert.ok(structured); const metadata = structured.metadata as | { title?: string; description?: string; author?: string; publishedAt?: string; modifiedAt?: string; } | undefined; assert.ok(metadata); assert.equal(metadata?.title, 'Metadata Title'); assert.equal(metadata?.description, 'Metadata description'); assert.equal(metadata?.author, 'Metadata Author'); assert.equal(metadata?.publishedAt, '2026-01-01T00:00:00Z'); assert.equal(metadata?.modifiedAt, '2026-01-02T00:00:00Z'); }); it('respects cancellation via the MCP request abort signal', async (t) => { const controller = new AbortController(); controller.abort(); t.mock.method( globalThis, 'fetch', async (_url: RequestInfo | URL, init?: RequestInit) => { const signal = init?.signal as unknown; if ( typeof signal === 'object' && signal !== null && 'aborted' in signal && (signal as { aborted?: unknown }).aborted === true ) { const err = new Error('This operation was aborted'); err.name = 'AbortError'; throw err; } return new Response('<html><body>ok</body></html>', { status: 200, headers: { 'content-type': 'text/html' }, }); } ); const url = 'https://example.com/cancelled-test'; const response = await fetchUrlToolHandler( { url }, { signal: controller.signal } ); assert.equal(response.isError, true); const structured = JSON.parse( (response.content[0] as { text: string })?.text ); assert.ok(structured); assert.equal(structured.url, url); assert.match(String(structured.error), /cancel|abort/i); }); it('exposes truncated flag when inline content is trimmed', async (t) => { const originalCacheEnabled = config.cache.enabled; const originalInlineLimit = config.constants.maxInlineContentChars; config.cache.enabled = false; config.constants.maxInlineContentChars = 20000; const html = `<html><body><p>${'a'.repeat(21000)}</p></body></html>`; try { t.mock.method(globalThis, 'fetch', async () => { return new Response(html, { status: 200, headers: { 'content-type': 'text/html' }, }); }); const response = await fetchUrlToolHandler({ url: 'https://example.com/large', }); const structured = response.structuredContent; assert.ok(structured); assert.equal(structured.truncated, true); assert.equal(structured.cacheResourceUri, undefined); } finally { config.cache.enabled = originalCacheEnabled; config.constants.maxInlineContentChars = originalInlineLimit; } }); it('exposes truncated flag when cached transform indicates truncation', async () => { const url = 'https://example.com/html-truncation'; const normalizedUrl = normalizeUrl(url).normalizedUrl; const cacheKey = cache.createCacheKey('markdown', normalizedUrl); assert.ok(cacheKey); cache.set( cacheKey, JSON.stringify({ markdown: 'cached content', truncated: true }), { url: normalizedUrl } ); const response = await fetchUrlToolHandler({ url }); const structured = response.structuredContent; assert.ok(structured); assert.equal(structured.fromCache, true); assert.equal(structured.truncated, true); assert.equal(typeof structured.fetchedAt, 'string'); assert.equal(typeof structured.cacheResourceUri, 'string'); assert.match( String(structured.cacheResourceUri), /^internal:\/\/cache\/markdown\/[a-f0-9.]+$/i ); }); it('adds truncation marker when HTML size truncation occurs', async (t) => { const originalCacheEnabled = config.cache.enabled; const originalInlineLimit = config.constants.maxInlineContentChars; const originalMaxHtmlSize = config.constants.maxHtmlSize; const originalMaxWorkerScale = config.transform.maxWorkerScale; config.cache.enabled = false; config.constants.maxInlineContentChars = 10000; config.constants.maxHtmlSize = 200; config.transform.maxWorkerScale = 0; await shutdownTransformWorkerPool(); const html = `<html><body><p>${'a'.repeat(2000)}</p></body></html>`; try { t.mock.method(globalThis, 'fetch', async () => { return new Response(html, { status: 200, headers: { 'content-type': 'text/html' }, }); }); const response = await fetchUrlToolHandler({ url: 'https://example.com/html-size-truncate', }); const structured = response.structuredContent; assert.ok(structured); assert.equal(structured.truncated, true); assert.ok(String(structured.markdown).includes('[truncated]')); } finally { await shutdownTransformWorkerPool(); config.cache.enabled = originalCacheEnabled; config.constants.maxInlineContentChars = originalInlineLimit; config.constants.maxHtmlSize = originalMaxHtmlSize; config.transform.maxWorkerScale = originalMaxWorkerScale; } }); it('includes truncation marker when fetch stage truncates', async (t) => { const originalCacheEnabled = config.cache.enabled; const originalInlineLimit = config.constants.maxInlineContentChars; const originalMaxHtmlSize = config.constants.maxHtmlSize; const originalFetchMax = config.fetcher.maxContentLength; config.cache.enabled = false; config.constants.maxInlineContentChars = 10000; config.constants.maxHtmlSize = 1000; config.fetcher.maxContentLength = 200; const html = `<html><body><p>${'a'.repeat(2000)}</p></body></html>`; try { t.mock.method(globalThis, 'fetch', async () => { return new Response(html, { status: 200, headers: { 'content-type': 'text/html' }, }); }); const response = await fetchUrlToolHandler({ url: 'https://example.com/fetch-size-truncate', }); const structured = response.structuredContent; assert.ok(structured); assert.equal(structured.truncated, true); assert.ok(String(structured.markdown).includes('[truncated]')); } finally { config.cache.enabled = originalCacheEnabled; config.constants.maxInlineContentChars = originalInlineLimit; config.constants.maxHtmlSize = originalMaxHtmlSize; config.fetcher.maxContentLength = originalFetchMax; } }); it('persists truncation marker in cached markdown payloads', async (t) => { const originalCacheEnabled = config.cache.enabled; const originalInlineLimit = config.constants.maxInlineContentChars; const originalMaxHtmlSize = config.constants.maxHtmlSize; const originalFetchMax = config.fetcher.maxContentLength; config.cache.enabled = true; config.constants.maxInlineContentChars = 10000; config.constants.maxHtmlSize = 1000; config.fetcher.maxContentLength = 200; const url = 'https://example.com/fetch-size-truncate-cache'; const html = `<html><body><p>${'a'.repeat(2000)}</p></body></html>`; try { t.mock.method(globalThis, 'fetch', async () => { return new Response(html, { status: 200, headers: { 'content-type': 'text/html' }, }); }); await fetchUrlToolHandler({ url }); const normalizedUrl = normalizeUrl(url).normalizedUrl; const cacheKey = cache.createCacheKey('markdown', normalizedUrl); assert.ok(cacheKey); const cachedEntry = cache.get(cacheKey); assert.ok(cachedEntry); const payload = JSON.parse(cachedEntry.content) as { markdown?: unknown; truncated?: unknown; }; assert.equal(payload.truncated, true); assert.equal(typeof payload.markdown, 'string'); assert.ok(String(payload.markdown).includes('...[truncated]')); } finally { config.cache.enabled = originalCacheEnabled; config.constants.maxInlineContentChars = originalInlineLimit; config.constants.maxHtmlSize = originalMaxHtmlSize; config.fetcher.maxContentLength = originalFetchMax; } }); it('returns truncated markdown even when cache + http mode are enabled', async (t) => { const originalHttpMode = config.runtime.httpMode; const originalCacheEnabled = config.cache.enabled; const originalInlineLimit = config.constants.maxInlineContentChars; config.runtime.httpMode = true; config.cache.enabled = true; config.constants.maxInlineContentChars = 20000; const html = `<html><body><p>${'a'.repeat(25000)}</p></body></html>`; try { t.mock.method(globalThis, 'fetch', async () => { return new Response(html, { status: 200, headers: { 'content-type': 'text/html' }, }); }); const response = await fetchUrlToolHandler({ url: 'https://example.com/large-http', }); const structured = response.structuredContent; assert.ok(structured); assert.equal(structured.truncated, true); assert.equal(typeof structured.markdown, 'string'); assert.ok(String(structured.markdown).includes('[truncated]')); assertTextBlockMatchesStructured(response); const embeddedResource = response.content.find( (block) => block.type === 'resource' ); assert.ok( embeddedResource, 'Embedded resource should be emitted for markdown preview' ); } finally { config.runtime.httpMode = originalHttpMode; config.cache.enabled = originalCacheEnabled; config.constants.maxInlineContentChars = originalInlineLimit; } }); it('preserves anchor lists without a TOC heading', async (t) => { const html = ` <html> <body> <ul> <li><a href="#intro">Intro</a></li> <li><a href="#usage">Usage</a></li> </ul> </body> </html> `; t.mock.method(globalThis, 'fetch', async () => { return new Response(html, { status: 200, headers: { 'content-type': 'text/html' }, }); }); const response = await fetchUrlToolHandler({ url: 'https://example.com/anchors', }); const structured = response.structuredContent; assert.ok(structured); const markdown = String(structured.markdown); assert.ok(markdown.includes('[Intro](#intro)')); assert.ok(markdown.includes('[Usage](#usage)')); }); it('preserves raw markdown content even with inline HTML', async (t) => { const rawContent = '# Title\n\n<details><summary>More</summary>Details</details>'; t.mock.method(globalThis, 'fetch', async () => { return new Response(rawContent, { status: 200, headers: { 'content-type': 'text/plain' }, }); }); const response = await fetchUrlToolHandler({ url: 'https://example.com/readme.md', }); const structured = response.structuredContent; assert.ok(structured); const markdown = String(structured.markdown); assert.ok(markdown.includes('<details>')); assert.ok(markdown.includes('Source: https://example.com/readme.md')); assertTextBlockMatchesStructured(response); }); it('falls back to UTF-8 for invalid charset labels in worker transforms', async (t) => { const html = '<html><body><p>café</p></body></html>'; const utf8 = Buffer.from(html, 'utf-8'); t.mock.method(globalThis, 'fetch', async () => { return new Response(utf8, { status: 200, headers: { 'content-type': 'text/html; charset=invalid-charset', }, }); }); const response = await fetchUrlToolHandler({ url: 'https://example.com/invalid-charset', }); assert.equal(response.isError, undefined); const structured = response.structuredContent; assert.ok(structured); assert.equal(typeof structured.markdown, 'string'); assert.match(String(structured.markdown), /café/); assertTextBlockMatchesStructured(response); }); it('returns an error response when markdown conversion fails', async (t) => { // Ensure clean state await shutdownTransformWorkerPool(); // Disable worker pool to force in-process transform which uses the mocked NodeHtmlMarkdown const originalMaxWorkerScale = config.transform.maxWorkerScale; config.transform.maxWorkerScale = 0; try { t.mock.method(NodeHtmlMarkdown.prototype, 'translate', () => { throw new Error('Translate failed'); }); t.mock.method(globalThis, 'fetch', async () => { return new Response('<html><body><p>Fail</p></body></html>', { status: 200, headers: { 'content-type': 'text/html' }, }); }); const response = await fetchUrlToolHandler({ url: 'https://example.com/convert-fail', }); assert.equal(response.isError, true); const structured = JSON.parse( (response.content[0] as { text: string })?.text ); assert.ok(structured); assert.match(String(structured.error), /convert|markdown/i); } finally { config.transform.maxWorkerScale = originalMaxWorkerScale; } }); it('returns an error response when markdown conversion fails in worker pool', async (t) => { const originalMaxWorkerScale = config.transform.maxWorkerScale; const originalCacheEnabled = config.cache.enabled; config.transform.maxWorkerScale = 1; config.cache.enabled = false; await shutdownTransformWorkerPool(); const binary = new Uint8Array([0x3c, 0x00, 0x3e, 0x00, 0x2f, 0x00]); try { t.mock.method(globalThis, 'fetch', async () => { return new Response(binary, { status: 200, headers: { 'content-type': 'text/html' }, }); }); const response = await fetchUrlToolHandler({ url: 'https://example.com/worker-convert-fail', }); assert.equal(response.isError, true); const structured = JSON.parse( (response.content[0] as { text: string })?.text ); assert.ok(structured); assert.equal(structured.statusCode, 500); assert.match(String(structured.error), /binary/i); } finally { await shutdownTransformWorkerPool(); config.transform.maxWorkerScale = originalMaxWorkerScale; config.cache.enabled = originalCacheEnabled; } }); it('includes status code and details for fetch errors', async (t) => { t.mock.method(globalThis, 'fetch', async () => { return new Response('busy', { status: 429, headers: { 'retry-after': '30' }, }); }); const response = await fetchUrlToolHandler({ url: 'https://example.com/rate-limited', }); assert.equal(response.isError, true); const structured = JSON.parse( (response.content[0] as { text: string })?.text ); assert.ok(structured); assert.equal(structured.statusCode, 429); const details = structured.details as { retryAfter?: number } | undefined; assert.equal(details?.retryAfter, 30); }); it('closes tilde code fences when truncating inline content', async (t) => { const originalInlineLimit = config.constants.maxInlineContentChars; config.constants.maxInlineContentChars = 20000; const longBody = 'a'.repeat(config.constants.maxInlineContentChars + 500); const rawContent = `# Title\n\n~~~\n${longBody}\n~~~\n`; try { t.mock.method(globalThis, 'fetch', async () => { return new Response(rawContent, { status: 200, headers: { 'content-type': 'text/plain' }, }); }); const response = await fetchUrlToolHandler({ url: 'https://example.com/tilde-fence', }); const structured = response.structuredContent; assert.ok(structured); assert.equal(structured.truncated, true); const markdown = String(structured.markdown); assert.match(markdown, /~~~\n\.\.\.\[truncated\]/); assert.equal(markdown.includes('```\n...[truncated]'), false); } finally { config.constants.maxInlineContentChars = originalInlineLimit; } }); it('converts html fragments even when markdown-like markers appear', async (t) => { const html = `<div>\n# Title\n</div>\n<p>Body</p>`; t.mock.method(globalThis, 'fetch', async () => { return new Response(html, { status: 200, headers: { 'content-type': 'text/plain' }, }); }); const response = await fetchUrlToolHandler({ url: 'https://example.com/html-fragment', }); const structured = response.structuredContent; assert.ok(structured); const markdown = String(structured.markdown); assert.equal(markdown.includes('<div>'), false); assert.equal(markdown.includes('<p>'), false); }); it('does not drop aggressive promo matches inside main content', () => { const originalAggressiveMode = config.noiseRemoval.aggressiveMode; config.noiseRemoval.aggressiveMode = true; const html = ` <html> <body> <main> <div class="related fixed"><p>KEEP_RELATED</p></div> </main> </body> </html> `; try { const markdown = htmlToMarkdown(html); assert.match(markdown, /KEEP\\?_RELATED/); } finally { config.noiseRemoval.aggressiveMode = originalAggressiveMode; } }); }); describe('htmlToMarkdown resolves parenthesized URLs correctly', () => { it('preserves balanced parentheses in resolved relative URLs', () => { const html = '<a href="/wiki/Foo_(bar)">Foo (bar)</a>'; const result = htmlToMarkdown(html, undefined, { url: 'https://en.wikipedia.org/wiki/Main_Page', }); assert.match( result, /\[Foo $bar$\]$https:\/\/en\.wikipedia\.org\/wiki\/Foo(?:_|%5F)(%28|\()bar(%29|$)\)/, 'Link with parenthesized URL should resolve correctly' ); }); it('handles simple relative URLs without parentheses', () => { const html = '<a href="/about">About</a>'; const result = htmlToMarkdown(html, undefined, { url: 'https://example.com/page', }); assert.match(result, /\[About\]$https:\/\/example\.com\/about$/); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/j0hanz/super-fetch-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

fetch-url-tool.test.ts•22.4 KiB