Skip to main content
Glama

docs-mcp-server

html-pipeline-nonhtml-e2e.test.ts2.91 kB
/** * End-to-end tests for handling non-HTML content that should not use Playwright. * * These tests ensure that text/plain and other non-HTML content types are handled * properly without triggering Playwright rendering, which can cause hangs. */ import { beforeAll, describe, expect, it } from "vitest"; import { FetchUrlTool } from "../src/tools/FetchUrlTool"; import { AutoDetectFetcher } from "../src/scraper/fetcher/AutoDetectFetcher"; import { ScrapeMode } from "../src/scraper/types"; describe("HTML Pipeline Non-HTML Content Tests", () => { let fetchUrlTool: FetchUrlTool; beforeAll(() => { const autoDetectFetcher = new AutoDetectFetcher(); fetchUrlTool = new FetchUrlTool(autoDetectFetcher); }); describe("Plain Text Content", () => { it("should handle text/plain content without hanging", async () => { // This URL serves content with Content-Type: text/plain, which was causing hangs const url = "https://raw.githubusercontent.com/9001/copyparty/hovudstraum/contrib/index.html"; const result = await fetchUrlTool.execute({ url, scrapeMode: ScrapeMode.Auto, followRedirects: true, }); expect(result).toBeTruthy(); expect(typeof result).toBe("string"); expect(result.length).toBeGreaterThan(10); // The content should be the HTML content, but processed as plain text expect(result.toLowerCase()).toContain("html"); expect(result.toLowerCase()).toContain("copyparty"); }, 30000); it("should handle robots.txt content (text/plain)", async () => { const url = "https://httpbin.org/robots.txt"; const result = await fetchUrlTool.execute({ url, scrapeMode: ScrapeMode.Auto, followRedirects: true, }); expect(result).toBeTruthy(); expect(typeof result).toBe("string"); expect(result.toLowerCase()).toContain("user-agent"); }, 30000); }); describe("Different Scrape Modes with Text Content", () => { it("should handle text/plain with ScrapeMode.Fetch", async () => { const url = "https://httpbin.org/robots.txt"; const result = await fetchUrlTool.execute({ url, scrapeMode: ScrapeMode.Fetch, followRedirects: true, }); expect(result).toBeTruthy(); expect(typeof result).toBe("string"); expect(result.toLowerCase()).toContain("user-agent"); }, 15000); it("should handle text/plain with ScrapeMode.Playwright (should not hang)", async () => { const url = "https://httpbin.org/robots.txt"; const result = await fetchUrlTool.execute({ url, scrapeMode: ScrapeMode.Playwright, followRedirects: true, }); expect(result).toBeTruthy(); expect(typeof result).toBe("string"); expect(result.toLowerCase()).toContain("user-agent"); }, 30000); }); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server