Skip to main content
Glama

docs-mcp-server

HttpFetcher.ts9.35 kB
import axios, { type AxiosError, type AxiosRequestConfig } from "axios"; import { CancellationError } from "../../pipeline/errors"; import { analytics, extractHostname, extractProtocol } from "../../telemetry"; import { FETCHER_BASE_DELAY, FETCHER_MAX_RETRIES } from "../../utils/config"; import { ChallengeError, RedirectError, ScraperError } from "../../utils/errors"; import { logger } from "../../utils/logger"; import { MimeTypeUtils } from "../../utils/mimeTypeUtils"; import { FingerprintGenerator } from "./FingerprintGenerator"; import type { ContentFetcher, FetchOptions, RawContent } from "./types"; /** * Fetches content from remote sources using HTTP/HTTPS. */ export class HttpFetcher implements ContentFetcher { private readonly retryableStatusCodes = [ 408, // Request Timeout 429, // Too Many Requests 500, // Internal Server Error 502, // Bad Gateway 503, // Service Unavailable 504, // Gateway Timeout 525, // SSL Handshake Failed (Cloudflare specific) ]; private readonly nonRetryableErrorCodes = [ "ENOTFOUND", // DNS resolution failed - domain doesn't exist "ECONNREFUSED", // Connection refused - service not running "ENOENT", // No such file or directory "EACCES", // Permission denied "EINVAL", // Invalid argument "EMFILE", // Too many open files "ENFILE", // File table overflow "EPERM", // Operation not permitted ]; private fingerprintGenerator: FingerprintGenerator; constructor() { this.fingerprintGenerator = new FingerprintGenerator(); } canFetch(source: string): boolean { return source.startsWith("http://") || source.startsWith("https://"); } private async delay(ms: number): Promise<void> { return new Promise((resolve) => setTimeout(resolve, ms)); } async fetch(source: string, options?: FetchOptions): Promise<RawContent> { const startTime = performance.now(); const maxRetries = options?.maxRetries ?? FETCHER_MAX_RETRIES; const baseDelay = options?.retryDelay ?? FETCHER_BASE_DELAY; // Default to following redirects if not specified const followRedirects = options?.followRedirects ?? true; try { const result = await this.performFetch( source, options, maxRetries, baseDelay, followRedirects, ); // Track successful HTTP request const duration = performance.now() - startTime; analytics.track("http_request_completed", { success: true, hostname: extractHostname(source), protocol: extractProtocol(source), durationMs: Math.round(duration), contentSizeBytes: result.content.length, mimeType: result.mimeType, hasEncoding: !!result.encoding, followRedirects: followRedirects, hadRedirects: result.source !== source, }); return result; } catch (error) { // Track failed HTTP request const duration = performance.now() - startTime; const axiosError = error as AxiosError; const status = axiosError.response?.status; analytics.track("http_request_completed", { success: false, hostname: extractHostname(source), protocol: extractProtocol(source), durationMs: Math.round(duration), statusCode: status, errorType: error instanceof CancellationError ? "cancellation" : error instanceof RedirectError ? "redirect" : error instanceof ScraperError ? "scraper" : "unknown", errorCode: axiosError.code, followRedirects: followRedirects, }); throw error; } } private async performFetch( source: string, options?: FetchOptions, maxRetries = FETCHER_MAX_RETRIES, baseDelay = FETCHER_BASE_DELAY, followRedirects = true, ): Promise<RawContent> { for (let attempt = 0; attempt <= maxRetries; attempt++) { try { const fingerprint = this.fingerprintGenerator.generateHeaders(); const headers = { ...fingerprint, ...options?.headers, // User-provided headers override generated ones }; const config: AxiosRequestConfig = { responseType: "arraybuffer", headers: { ...headers, // Override Accept-Encoding to exclude zstd which Axios doesn't handle automatically // This prevents servers from sending zstd-compressed content that would appear as binary garbage "Accept-Encoding": "gzip, deflate, br", }, timeout: options?.timeout, signal: options?.signal, // Pass signal to axios // Axios follows redirects by default, we need to explicitly disable it if needed maxRedirects: followRedirects ? 5 : 0, decompress: true, }; const response = await axios.get(source, config); const contentTypeHeader = response.headers["content-type"]; const { mimeType, charset } = MimeTypeUtils.parseContentType(contentTypeHeader); const contentEncoding = response.headers["content-encoding"]; // Convert ArrayBuffer to Buffer properly let content: Buffer; if (response.data instanceof ArrayBuffer) { content = Buffer.from(response.data); } else if (Buffer.isBuffer(response.data)) { content = response.data; } else if (typeof response.data === "string") { content = Buffer.from(response.data, "utf-8"); } else { // Fallback for other data types content = Buffer.from(response.data); } // Determine the final effective URL after redirects (if any) const finalUrl = // Node follow-redirects style response.request?.res?.responseUrl || // Some adapters may expose directly response.request?.responseUrl || // Fallback to axios recorded config URL response.config?.url || source; return { content, mimeType, charset, encoding: contentEncoding, source: finalUrl, } satisfies RawContent; } catch (error: unknown) { const axiosError = error as AxiosError; const status = axiosError.response?.status; const code = axiosError.code; // Handle abort/cancel: do not retry, throw CancellationError if (options?.signal?.aborted || code === "ERR_CANCELED") { // Throw with isError = false to indicate cancellation is not an error throw new CancellationError("HTTP fetch cancelled"); } // Handle redirect errors (status codes 301, 302, 303, 307, 308) if (!followRedirects && status && status >= 300 && status < 400) { const location = axiosError.response?.headers?.location; if (location) { throw new RedirectError(source, location, status); } } // Detect Cloudflare challenges if (status === 403) { const cfMitigated = axiosError.response?.headers?.["cf-mitigated"]; const server = axiosError.response?.headers?.server; let responseBody = ""; // Safely convert response data to string if (axiosError.response?.data) { try { if (typeof axiosError.response.data === "string") { responseBody = axiosError.response.data; } else if (Buffer.isBuffer(axiosError.response.data)) { responseBody = axiosError.response.data.toString("utf-8"); } else if (axiosError.response.data instanceof ArrayBuffer) { responseBody = Buffer.from(axiosError.response.data).toString("utf-8"); } } catch { // Ignore conversion errors } } // Check for various Cloudflare challenge indicators const isCloudflareChallenge = cfMitigated === "challenge" || server === "cloudflare" || responseBody.includes("Enable JavaScript and cookies to continue") || responseBody.includes("Just a moment...") || responseBody.includes("cf_chl_opt"); if (isCloudflareChallenge) { throw new ChallengeError(source, status, "cloudflare"); } } if ( attempt < maxRetries && (status === undefined || this.retryableStatusCodes.includes(status)) && !this.nonRetryableErrorCodes.includes(code ?? "") ) { const delay = baseDelay * 2 ** attempt; logger.warn( `⚠️ Attempt ${attempt + 1}/${ maxRetries + 1 } failed for ${source} (Status: ${status}, Code: ${code}). Retrying in ${delay}ms...`, ); await this.delay(delay); continue; } // Not a 5xx error or max retries reached throw new ScraperError( `Failed to fetch ${source} after ${ attempt + 1 } attempts: ${axiosError.message ?? "Unknown error"}`, true, error instanceof Error ? error : undefined, ); } } throw new ScraperError( `Failed to fetch ${source} after ${maxRetries + 1} attempts`, true, ); } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server