Skip to main content
Glama

docs-mcp-server

html-pipeline-basic-e2e.test.ts5.23 kB
/** * Basic end-to-end tests for HTML pipeline functionality. * * These tests validate core functionality using reliable test endpoints (httpbin.org) * and are designed to be fast and stable for CI/CD environments. They test the * fundamental capabilities without relying on external websites that might change. * * For comprehensive real-world website testing, see html-pipeline-websites-e2e.test.ts */ import { beforeAll, describe, expect, it } from "vitest"; import { FetchUrlTool } from "../src/tools/FetchUrlTool"; import { AutoDetectFetcher } from "../src/scraper/fetcher/AutoDetectFetcher"; import { ScrapeMode } from "../src/scraper/types"; describe("HTML Pipeline Basic Tests", () => { let fetchUrlTool: FetchUrlTool; beforeAll(() => { const autoDetectFetcher = new AutoDetectFetcher(); fetchUrlTool = new FetchUrlTool(autoDetectFetcher); }); describe("Core Functionality", () => { it("should fetch and process a simple HTTP page", async () => { // Use httpbin.org for reliable testing - it returns Moby Dick content const url = "https://httpbin.org/html"; const result = await fetchUrlTool.execute({ url, scrapeMode: ScrapeMode.Auto, followRedirects: true, }); expect(result).toBeTruthy(); expect(typeof result).toBe("string"); expect(result.length).toBeGreaterThan(10); // httpbin.org/html returns Moby Dick content, check for that instead expect(result.toLowerCase()).toContain("herman melville"); }, 30000); it("should process JSON content gracefully", async () => { const url = "https://httpbin.org/json"; const result = await fetchUrlTool.execute({ url, scrapeMode: ScrapeMode.Auto, followRedirects: true, }); expect(result).toBeTruthy(); expect(typeof result).toBe("string"); // JSON should be returned as text expect(result).toContain("{"); }, 30000); it("should handle redirects", async () => { const url = "https://httpbin.org/redirect/1"; const result = await fetchUrlTool.execute({ url, scrapeMode: ScrapeMode.Auto, followRedirects: true, }); expect(result).toBeTruthy(); expect(typeof result).toBe("string"); }, 30000); }); describe("Error Handling", () => { it("should handle 404 errors", async () => { const url = "https://httpbin.org/status/404"; await expect(fetchUrlTool.execute({ url, scrapeMode: ScrapeMode.Auto, followRedirects: true, })).rejects.toThrow(); }, 30000); it("should handle malformed URLs", async () => { const url = "not-a-url"; await expect(fetchUrlTool.execute({ url, scrapeMode: ScrapeMode.Auto, followRedirects: true, })).rejects.toThrow(); }, 10000); it("should handle non-existent domains", async () => { const url = "https://this-domain-definitely-does-not-exist-12345.com"; await expect(fetchUrlTool.execute({ url, scrapeMode: ScrapeMode.Auto, followRedirects: true, })).rejects.toThrow(); }, 30000); }); describe("Different Content Types", () => { it("should handle XML content", async () => { const url = "https://httpbin.org/xml"; const result = await fetchUrlTool.execute({ url, scrapeMode: ScrapeMode.Auto, followRedirects: true, }); expect(result).toBeTruthy(); expect(typeof result).toBe("string"); expect(result).toContain("<"); }, 30000); it("should handle robots.txt files", async () => { const url = "https://httpbin.org/robots.txt"; const result = await fetchUrlTool.execute({ url, scrapeMode: ScrapeMode.Auto, followRedirects: true, }); expect(result).toBeTruthy(); expect(typeof result).toBe("string"); expect(result.toLowerCase()).toContain("user-agent"); }, 30000); }); describe("Custom Headers", () => { it("should respect custom headers", async () => { const url = "https://httpbin.org/headers"; const result = await fetchUrlTool.execute({ url, scrapeMode: ScrapeMode.Auto, followRedirects: true, headers: { "X-Test-Header": "test-value", }, }); expect(result).toBeTruthy(); expect(typeof result).toBe("string"); // The response should include our custom header expect(result).toContain("X-Test-Header"); expect(result).toContain("test-value"); }, 30000); }); describe("Performance Validation", () => { it("should complete simple requests within reasonable time", async () => { const startTime = Date.now(); const url = "https://httpbin.org/html"; await fetchUrlTool.execute({ url, scrapeMode: ScrapeMode.Fetch, // Use faster mode for performance test followRedirects: true, }); const duration = Date.now() - startTime; // Should complete within 10 seconds for a simple page expect(duration).toBeLessThan(10000); }, 15000); }); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server