Skip to main content
Glama

docs-mcp-server

HttpFetcher.test.ts20.6 kB
import { beforeEach, describe, expect, it, vi } from "vitest"; import { CancellationError } from "../../pipeline/errors"; import { RedirectError, ScraperError } from "../../utils/errors"; vi.mock("axios"); vi.mock("../../utils/logger"); import axios from "axios"; const mockedAxios = vi.mocked(axios, true); import { HttpFetcher } from "./HttpFetcher"; describe("HttpFetcher", () => { beforeEach(() => { mockedAxios.get.mockReset(); }); describe("canFetch", () => { it("should return true for HTTP URLs", () => { const fetcher = new HttpFetcher(); expect(fetcher.canFetch("http://example.com")).toBe(true); expect(fetcher.canFetch("https://example.com")).toBe(true); }); it("should return false for non-HTTP URLs", () => { const fetcher = new HttpFetcher(); expect(fetcher.canFetch("ftp://example.com")).toBe(false); expect(fetcher.canFetch("file:///path/to/file")).toBe(false); expect(fetcher.canFetch("mailto:test@example.com")).toBe(false); expect(fetcher.canFetch("relative/path")).toBe(false); }); }); describe("data type handling", () => { it("should handle ArrayBuffer response data", async () => { const fetcher = new HttpFetcher(); const textContent = "Hello World"; const arrayBuffer = new TextEncoder().encode(textContent).buffer; const mockResponse = { data: arrayBuffer, headers: { "content-type": "text/plain" }, }; mockedAxios.get.mockResolvedValue(mockResponse); const result = await fetcher.fetch("https://example.com"); expect(result.content).toEqual(Buffer.from(textContent, "utf-8")); }); it("should handle string response data", async () => { const fetcher = new HttpFetcher(); const textContent = "Hello World"; const mockResponse = { data: textContent, headers: { "content-type": "text/plain" }, }; mockedAxios.get.mockResolvedValue(mockResponse); const result = await fetcher.fetch("https://example.com"); expect(result.content).toEqual(Buffer.from(textContent, "utf-8")); }); it("should handle other data types as fallback", async () => { const fetcher = new HttpFetcher(); // Use an array instead of object to avoid Buffer.from() issues const arrayData = [1, 2, 3]; const mockResponse = { data: arrayData, headers: { "content-type": "application/json" }, }; mockedAxios.get.mockResolvedValue(mockResponse); const result = await fetcher.fetch("https://example.com"); expect(result.content).toBeInstanceOf(Buffer); expect(result.mimeType).toBe("application/json"); }); }); describe("cancellation", () => { it("should throw CancellationError when signal is aborted", async () => { const fetcher = new HttpFetcher(); const abortController = new AbortController(); abortController.abort(); mockedAxios.get.mockRejectedValue({ code: "ERR_CANCELED" }); await expect( fetcher.fetch("https://example.com", { signal: abortController.signal }), ).rejects.toBeInstanceOf(CancellationError); }); it("should throw CancellationError when axios returns ERR_CANCELED", async () => { const fetcher = new HttpFetcher(); mockedAxios.get.mockRejectedValue({ code: "ERR_CANCELED" }); await expect(fetcher.fetch("https://example.com")).rejects.toBeInstanceOf( CancellationError, ); }); }); describe("error handling edge cases", () => { it("should handle network errors without response object", async () => { const fetcher = new HttpFetcher(); const networkError = new Error("Network Error"); mockedAxios.get.mockRejectedValue(networkError); await expect( fetcher.fetch("https://example.com", { maxRetries: 0 }), ).rejects.toThrow(ScraperError); expect(mockedAxios.get).toHaveBeenCalledTimes(1); }); it("should handle redirects without location header when followRedirects is false", async () => { const fetcher = new HttpFetcher(); mockedAxios.get.mockRejectedValue({ response: { status: 301, headers: {}, // No location header }, }); // Should not throw RedirectError without location, should retry or throw ScraperError await expect( fetcher.fetch("https://example.com", { followRedirects: false, maxRetries: 0 }), ).rejects.toThrow(ScraperError); }); }); describe("configuration defaults", () => { it("should use default max retries when not specified", async () => { const fetcher = new HttpFetcher(); // Mock failure for all attempts - use a retryable error mockedAxios.get.mockRejectedValue({ response: { status: 500 } }); await expect( fetcher.fetch("https://example.com", { retryDelay: 1, // Minimal delay for fast test maxRetries: undefined, // Explicitly test default }), ).rejects.toThrow(ScraperError); // Should call initial attempt + 6 retries (default FETCHER_MAX_RETRIES = 6) expect(mockedAxios.get).toHaveBeenCalledTimes(7); }); it("should respect custom maxRetries option", async () => { const fetcher = new HttpFetcher(); mockedAxios.get.mockRejectedValue({ response: { status: 500 } }); await expect( fetcher.fetch("https://example.com", { maxRetries: 2, retryDelay: 1, }), ).rejects.toThrow(ScraperError); // Should call initial attempt + 2 custom retries expect(mockedAxios.get).toHaveBeenCalledTimes(3); }); it("should pass timeout option to axios", async () => { const fetcher = new HttpFetcher(); const mockResponse = { data: Buffer.from("test", "utf-8"), headers: { "content-type": "text/plain" }, }; mockedAxios.get.mockResolvedValue(mockResponse); await fetcher.fetch("https://example.com", { timeout: 5000 }); expect(mockedAxios.get).toHaveBeenCalledWith( "https://example.com", expect.objectContaining({ timeout: 5000, }), ); }); }); it("should fetch content successfully", async () => { const fetcher = new HttpFetcher(); const htmlContent = "<html><body><h1>Hello</h1></body></html>"; const mockResponse = { data: Buffer.from(htmlContent, "utf-8"), // HttpFetcher expects buffer from axios headers: { "content-type": "text/html; charset=utf-8" }, }; mockedAxios.get.mockResolvedValue(mockResponse); const result = await fetcher.fetch("https://example.com"); expect(result.content).toEqual(Buffer.from(htmlContent, "utf-8")); expect(result.mimeType).toBe("text/html"); expect(result.charset).toBe("utf-8"); expect(result.source).toBe("https://example.com"); }); it("should extract charset from content-type header", async () => { const fetcher = new HttpFetcher(); const textContent = "abc"; const mockResponse = { data: Buffer.from(textContent, "utf-8"), headers: { "content-type": "text/plain; charset=iso-8859-1" }, }; mockedAxios.get.mockResolvedValue(mockResponse); const result = await fetcher.fetch("https://example.com/file.txt"); expect(result.mimeType).toBe("text/plain"); expect(result.charset).toBe("iso-8859-1"); }); it("should set charset undefined if not present in content-type", async () => { const fetcher = new HttpFetcher(); const textContent = "abc"; const mockResponse = { data: Buffer.from(textContent, "utf-8"), headers: { "content-type": "text/plain" }, }; mockedAxios.get.mockResolvedValue(mockResponse); const result = await fetcher.fetch("https://example.com/file.txt"); expect(result.mimeType).toBe("text/plain"); expect(result.charset).toBeUndefined(); }); it("should extract encoding from content-encoding header", async () => { const fetcher = new HttpFetcher(); const textContent = "abc"; const mockResponse = { data: Buffer.from(textContent, "utf-8"), headers: { "content-type": "text/plain; charset=utf-8", "content-encoding": "gzip", }, }; mockedAxios.get.mockResolvedValue(mockResponse); const result = await fetcher.fetch("https://example.com/file.txt"); expect(result.encoding).toBe("gzip"); expect(result.mimeType).toBe("text/plain"); expect(result.charset).toBe("utf-8"); }); it("should default mimeType to application/octet-stream if content-type header is missing", async () => { const fetcher = new HttpFetcher(); const mockResponse = { data: Buffer.from([1, 2, 3]), headers: {}, }; mockedAxios.get.mockResolvedValue(mockResponse); const result = await fetcher.fetch("https://example.com/file.bin"); expect(result.mimeType).toBe("application/octet-stream"); expect(result.charset).toBeUndefined(); }); it("should handle different content types", async () => { const fetcher = new HttpFetcher(); const mockResponse = { data: Buffer.from([0x89, 0x50, 0x4e, 0x47]), headers: { "content-type": "image/png" }, }; mockedAxios.get.mockResolvedValue(mockResponse); const result = await fetcher.fetch("https://example.com/image.png"); expect(result.content).toEqual(mockResponse.data); expect(result.mimeType).toBe("image/png"); }); describe("retry logic", () => { it("should retry on all retryable HTTP status codes", async () => { const fetcher = new HttpFetcher(); // Test all retryable status codes from HttpFetcher: 408, 429, 500, 502, 503, 504, 525 const retryableStatuses = [408, 429, 500, 502, 503, 504, 525]; for (const status of retryableStatuses) { mockedAxios.get.mockReset(); mockedAxios.get.mockRejectedValueOnce({ response: { status } }); mockedAxios.get.mockResolvedValueOnce({ data: Buffer.from("success", "utf-8"), headers: { "content-type": "text/plain" }, }); const result = await fetcher.fetch("https://example.com", { maxRetries: 1, retryDelay: 1, }); expect(result.content).toEqual(Buffer.from("success", "utf-8")); expect(mockedAxios.get).toHaveBeenCalledTimes(2); // Initial + 1 retry } }); it("should not retry on non-retryable HTTP status codes", async () => { const fetcher = new HttpFetcher(); // Test various non-retryable status codes const nonRetryableStatuses = [400, 401, 403, 404, 405, 410]; for (const status of nonRetryableStatuses) { mockedAxios.get.mockReset(); mockedAxios.get.mockRejectedValue({ response: { status } }); await expect( fetcher.fetch("https://example.com", { maxRetries: 2, retryDelay: 1, }), ).rejects.toThrow(ScraperError); expect(mockedAxios.get).toHaveBeenCalledTimes(1); // No retries } }); it("should retry on undefined status (network errors)", async () => { const fetcher = new HttpFetcher(); // Simulate network error without response object mockedAxios.get.mockRejectedValueOnce(new Error("Network timeout")); mockedAxios.get.mockResolvedValueOnce({ data: Buffer.from("recovered", "utf-8"), headers: { "content-type": "text/plain" }, }); const result = await fetcher.fetch("https://example.com", { maxRetries: 1, retryDelay: 1, }); expect(result.content).toEqual(Buffer.from("recovered", "utf-8")); expect(mockedAxios.get).toHaveBeenCalledTimes(2); }); it("should use exponential backoff for retry delays", async () => { const fetcher = new HttpFetcher(); // Mock setTimeout to spy on delay behavior without actually waiting const setTimeoutSpy = vi.spyOn(global, "setTimeout"); // Mock all retries to fail, then succeed mockedAxios.get.mockRejectedValueOnce({ response: { status: 500 } }); mockedAxios.get.mockRejectedValueOnce({ response: { status: 500 } }); mockedAxios.get.mockRejectedValueOnce({ response: { status: 500 } }); mockedAxios.get.mockResolvedValueOnce({ data: Buffer.from("success", "utf-8"), headers: { "content-type": "text/plain" }, }); // Execute fetch with base delay of 10ms const baseDelay = 10; await fetcher.fetch("https://example.com", { maxRetries: 3, retryDelay: baseDelay, }); // Verify exponential backoff: baseDelay * 2^attempt // Attempt 0: 10ms, Attempt 1: 20ms, Attempt 2: 40ms expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 10); expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 20); expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 40); setTimeoutSpy.mockRestore(); }); }); it("should not retry on unretryable HTTP errors", async () => { const fetcher = new HttpFetcher(); mockedAxios.get.mockRejectedValue({ response: { status: 404 } }); await expect( fetcher.fetch("https://example.com", { retryDelay: 1, // Use minimal delay }), ).rejects.toThrow(ScraperError); expect(mockedAxios.get).toHaveBeenCalledTimes(1); }); it("should retry on retryable HTTP errors", async () => { const fetcher = new HttpFetcher(); const retryableErrors = [429, 500, 503]; for (const status of retryableErrors) { mockedAxios.get.mockRejectedValueOnce({ response: { status } }); } const htmlContent = "<html><body><h1>Hello</h1></body></html>"; mockedAxios.get.mockResolvedValueOnce({ data: Buffer.from(htmlContent, "utf-8"), headers: { "content-type": "text/html" }, }); // Test behavior: retry mechanism should eventually succeed const result = await fetcher.fetch("https://example.com", { retryDelay: 1, // Use minimal delay to speed up test maxRetries: 3, }); expect(mockedAxios.get).toHaveBeenCalledTimes(retryableErrors.length + 1); expect(result.content).toEqual(Buffer.from(htmlContent, "utf-8")); }); it("should throw error after max retries", async () => { const fetcher = new HttpFetcher(); const maxRetries = 2; // Use smaller number for faster test mockedAxios.get.mockRejectedValue({ response: { status: 502 } }); await expect( fetcher.fetch("https://example.com", { maxRetries: maxRetries, retryDelay: 1, // Use minimal delay }), ).rejects.toThrow(ScraperError); expect(mockedAxios.get).toHaveBeenCalledTimes(maxRetries + 1); }); it("should generate fingerprint headers", async () => { const fetcher = new HttpFetcher(); const mockResponse = { data: Buffer.from("<html><body><h1>Hello</h1></body></html>", "utf-8"), headers: { "content-type": "text/html" }, }; mockedAxios.get.mockResolvedValue(mockResponse); await fetcher.fetch("https://example.com"); // Test behavior: verify that axios is called with required properties expect(mockedAxios.get).toHaveBeenCalledWith("https://example.com", { responseType: "arraybuffer", headers: expect.objectContaining({ "user-agent": expect.any(String), accept: expect.any(String), "accept-language": expect.any(String), // Verify that our custom Accept-Encoding header is set (excluding zstd) "Accept-Encoding": "gzip, deflate, br", }), timeout: undefined, maxRedirects: 5, signal: undefined, decompress: true, }); }); it("should respect custom headers", async () => { const fetcher = new HttpFetcher(); const mockResponse = { data: Buffer.from("<html><body><h1>Hello</h1></body></html>", "utf-8"), headers: { "content-type": "text/html" }, }; mockedAxios.get.mockResolvedValue(mockResponse); const headers = { "X-Custom-Header": "value" }; await fetcher.fetch("https://example.com", { headers }); // Test behavior: verify custom headers are included expect(mockedAxios.get).toHaveBeenCalledWith("https://example.com", { responseType: "arraybuffer", headers: expect.objectContaining(headers), timeout: undefined, maxRedirects: 5, signal: undefined, decompress: true, }); }); describe("redirect handling", () => { it("should follow redirects by default", async () => { const fetcher = new HttpFetcher(); const mockResponse = { data: Buffer.from("<html><body><h1>Hello</h1></body></html>", "utf-8"), headers: { "content-type": "text/html" }, }; mockedAxios.get.mockResolvedValue(mockResponse); const result = await fetcher.fetch("https://example.com"); // Test behavior: verify result is correct and redirects are allowed expect(result.content).toEqual( Buffer.from("<html><body><h1>Hello</h1></body></html>", "utf-8"), ); expect(mockedAxios.get).toHaveBeenCalledWith( "https://example.com", expect.objectContaining({ maxRedirects: 5, // Should allow redirects by default }), ); }); it("should follow redirects when followRedirects is true", async () => { const fetcher = new HttpFetcher(); const mockResponse = { data: Buffer.from("<html><body><h1>Hello</h1></body></html>", "utf-8"), headers: { "content-type": "text/html" }, }; mockedAxios.get.mockResolvedValue(mockResponse); const result = await fetcher.fetch("https://example.com", { followRedirects: true, }); // Test behavior: verify result is correct and redirects are allowed expect(result.content).toEqual( Buffer.from("<html><body><h1>Hello</h1></body></html>", "utf-8"), ); expect(mockedAxios.get).toHaveBeenCalledWith( "https://example.com", expect.objectContaining({ maxRedirects: 5, // Should allow redirects }), ); }); it("should not follow redirects when followRedirects is false", async () => { const fetcher = new HttpFetcher(); const mockResponse = { data: Buffer.from("<html><body><h1>Hello</h1></body></html>", "utf-8"), headers: { "content-type": "text/html" }, }; mockedAxios.get.mockResolvedValue(mockResponse); const result = await fetcher.fetch("https://example.com", { followRedirects: false, }); // Test behavior: verify result is correct and redirects are disabled expect(result.content).toEqual( Buffer.from("<html><body><h1>Hello</h1></body></html>", "utf-8"), ); expect(mockedAxios.get).toHaveBeenCalledWith( "https://example.com", expect.objectContaining({ maxRedirects: 0, // Should not allow redirects }), ); }); it("should throw RedirectError when a redirect is encountered and followRedirects is false", async () => { const fetcher = new HttpFetcher(); const redirectError = { response: { status: 301, headers: { location: "https://new-example.com", }, }, }; mockedAxios.get.mockRejectedValue(redirectError); await expect( fetcher.fetch("https://example.com", { followRedirects: false }), ).rejects.toBeInstanceOf(RedirectError); await expect( fetcher.fetch("https://example.com", { followRedirects: false }), ).rejects.toMatchObject({ originalUrl: "https://example.com", redirectUrl: "https://new-example.com", statusCode: 301, }); }); it("should expose final redirect URL as source (canonical trailing slash + query)", async () => { const fetcher = new HttpFetcher(); const original = "https://learn.microsoft.com/en-us/azure/bot-service"; const finalUrl = `${original}/?view=azure-bot-service-4.0`; // Simulate axios response object after redirects (follow-redirects style) mockedAxios.get.mockResolvedValue({ data: Buffer.from("<html><body>OK</body></html>", "utf-8"), headers: { "content-type": "text/html" }, request: { res: { responseUrl: finalUrl } }, config: { url: finalUrl }, }); const result = await fetcher.fetch(original); // Expected to FAIL before implementation change (currently returns original) expect(result.source).toBe(finalUrl); }); }); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server