read-website-fast

Overview Schema Related Servers Score Discussions

fetchMarkdown.ts

fetchMarkdown.ts•4.36 KiB

import { fetch, CrawlOptions } from '@just-every/crawl'; import { extractMarkdownLinks, filterSameOriginLinks } from '../utils/extractMarkdownLinks.js'; export interface FetchMarkdownOptions { depth?: number; maxConcurrency?: number; respectRobots?: boolean; sameOriginOnly?: boolean; userAgent?: string; cacheDir?: string; timeout?: number; maxPages?: number; cookiesFile?: string; } export interface FetchMarkdownResult { markdown: string; title?: string; links?: string[]; error?: string; } export async function fetchMarkdown( url: string, options: FetchMarkdownOptions = {} ): Promise<FetchMarkdownResult> { try { const maxPages = options.maxPages ?? 1; const visited = new Set<string>(); const toVisit = [url]; const allResults: any[] = []; // If we want multiple pages, we need to crawl iteratively while (toVisit.length > 0 && allResults.length < maxPages) { const currentUrl = toVisit.shift()!; // Skip if already visited if (visited.has(currentUrl)) continue; visited.add(currentUrl); // Fetch single page const crawlOptions: CrawlOptions = { depth: 0, // Always single page maxConcurrency: options.maxConcurrency ?? 3, respectRobots: options.respectRobots ?? true, sameOriginOnly: options.sameOriginOnly ?? true, userAgent: options.userAgent, cacheDir: options.cacheDir ?? '.cache', timeout: options.timeout ?? 30000, }; if (options.cookiesFile) { (crawlOptions as any).cookiesFile = options.cookiesFile; } const results = await fetch(currentUrl, crawlOptions); if (results && results.length > 0) { const result = results[0]; allResults.push(result); // Extract links from markdown if we need more pages if (allResults.length < maxPages && result.markdown) { const links = extractMarkdownLinks(result.markdown, currentUrl); const filteredLinks = options.sameOriginOnly !== false ? filterSameOriginLinks(links, currentUrl) : links; // Add new links to visit queue for (const link of filteredLinks) { if (!visited.has(link) && !toVisit.includes(link)) { toVisit.push(link); } } } } } if (allResults.length === 0) { return { markdown: '', error: 'No results returned', }; } // Process results as before const pagesToReturn = allResults; // Combine all pages into a single markdown document const combinedMarkdown = pagesToReturn .map((result, index) => { if (result.error) { return ``; } let pageContent = ''; // Add page separator for multiple pages if (pagesToReturn.length > 1 && index > 0) { pageContent += '\n\n---\n\n'; } // Add source URL as a comment pageContent += `\n`; // Add the content pageContent += result.markdown || ''; return pageContent; }) .join('\n'); // Return combined results return { markdown: combinedMarkdown, title: pagesToReturn[0].title, links: pagesToReturn.flatMap(r => r.links || []), error: pagesToReturn.some(r => r.error) ? `Some pages had errors: ${pagesToReturn.filter(r => r.error).map(r => r.url).join(', ')}` : undefined, }; } catch (error) { return { markdown: '', error: error instanceof Error ? error.message : 'Unknown error', }; } }

Loading blob content...

Implementation Reference

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/just-every/mcp-read-website-fast'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

fetchMarkdown.ts•4.36 KiB