Superglue MCP

Official

Overview Schema Related Servers Score Discussions

superglue
packages
core
documentation

documentation.test.ts•46.3 KiB

import playwright from "@playwright/test"; import { ServiceMetadata } from "@superglue/shared"; import axios from "axios"; import { afterEach, beforeEach, describe, expect, it, Mocked, vi } from "vitest"; import { server_defaults } from "../default.js"; import { DocumentationFetcher } from "./documentation-fetching.js"; import { PlaywrightFetchingStrategy } from "./strategies/index.js"; import { DocumentationSearch } from "./documentation-search.js"; // Mock playwright and axios vi.mock("@playwright/test", async (importOriginal) => { const original = (await importOriginal()) as any; return { ...original, // Preserve other exports if any default: { chromium: { launch: vi.fn(), }, }, }; }); vi.mock("axios"); // Helper to create standard Playwright mocks const createPlaywrightMocks = () => { const mockPage = { goto: vi.fn().mockResolvedValue(undefined), waitForLoadState: vi.fn().mockResolvedValue(undefined), waitForTimeout: vi.fn().mockResolvedValue(undefined), addInitScript: vi.fn().mockResolvedValue(undefined), content: vi.fn().mockResolvedValue(""), evaluate: vi.fn().mockResolvedValue(undefined), close: vi.fn().mockResolvedValue(undefined), }; const mockContext = { newPage: vi.fn().mockResolvedValue(mockPage), close: vi.fn().mockResolvedValue(undefined), }; const mockBrowser = { newContext: vi.fn().mockResolvedValue(mockContext), close: vi.fn().mockResolvedValue(undefined), }; // Setup the browser launch mock with a type assertion vi.mocked(playwright.chromium.launch).mockResolvedValue( mockBrowser as unknown as playwright.Browser, ); return { mockPage, mockContext, mockBrowser }; }; describe("Documentation Class", () => { let mockPage: any; let mockContext: any; let mockBrowser: any; let mockedAxios: Mocked<typeof axios>; // Use Mocked type let metadata: ServiceMetadata = { orgId: "" }; beforeEach(() => { // Reset all mocks vi.clearAllMocks(); mockedAxios = axios as Mocked<typeof axios>; // Ensure axios is typed correctly mockedAxios.get.mockReset(); // Reset mocks specifically mockedAxios.post.mockReset(); // Set LLM_PROVIDER env var to prevent errors when accessing LanguageModel.contextLength process.env.LLM_PROVIDER = "ANTHROPIC"; // Create standard mocks for Playwright ({ mockPage, mockContext, mockBrowser } = createPlaywrightMocks()); }); afterEach(async () => { // Use the static closeBrowser from the strategy class await PlaywrightFetchingStrategy.closeBrowser(); }); describe("fetchAndProcess", () => { it("should fetch and convert HTML documentation via Playwright", async () => { const htmlDoc = ` <html><body><h1>API Docs</h1><p>Details here.</p></body></html> `; mockPage.evaluate.mockResolvedValue({ html: htmlDoc, textContent: "API Docs Details here.", links: {}, }); // Mock sitemap requests to fail (404) mockedAxios.get.mockRejectedValue(new Error("404")); const docUrl = "https://api.example.com/docs"; const doc = new DocumentationFetcher( { documentationUrl: docUrl, urlHost: "https://api.example.com" }, {}, metadata, ); const result = await doc.fetchAndProcess(); expect(playwright.chromium.launch).toHaveBeenCalledTimes(1); expect(mockBrowser.newContext).toHaveBeenCalledTimes(1); expect(mockContext.newPage).toHaveBeenCalledTimes(1); expect(mockPage.goto).toHaveBeenCalledWith(docUrl, { timeout: server_defaults.DOCUMENTATION.TIMEOUTS.PLAYWRIGHT, }); expect(mockPage.waitForLoadState).toHaveBeenCalledWith("domcontentloaded", { timeout: server_defaults.DOCUMENTATION.TIMEOUTS.PLAYWRIGHT, }); expect(mockPage.waitForTimeout).toHaveBeenCalledWith(1000); expect(mockPage.evaluate).toHaveBeenCalledTimes(1); // Single evaluate for DOM manipulation and link extraction expect(result).toContain("# API Docs"); expect(result).toContain("Details here."); // Sitemap fetches are attempted expect(mockedAxios.get).toHaveBeenCalled(); expect(mockedAxios.post).not.toHaveBeenCalled(); }); it("should return raw page content if not HTML, GraphQL, or OpenAPI", async () => { const plainDoc = "Plain text documentation content."; mockPage.evaluate.mockResolvedValue({ html: plainDoc, textContent: plainDoc, links: {} }); // Mock sitemap requests to fail mockedAxios.get.mockRejectedValue(new Error("404")); const doc = new DocumentationFetcher( { documentationUrl: "https://api.example.com/raw", urlHost: "https://api.example.com" }, {}, metadata, ); const result = await doc.fetchAndProcess(); expect(playwright.chromium.launch).toHaveBeenCalledTimes(1); expect(mockPage.evaluate).toHaveBeenCalledTimes(1); expect(result).toBe(plainDoc); expect(mockedAxios.get).toHaveBeenCalled(); // Sitemap attempts expect(mockedAxios.post).not.toHaveBeenCalled(); }); it("should attempt GraphQL introspection for likely GraphQL URLs", async () => { const mockSchema = { __schema: { types: [{ name: "Query" }] } }; mockedAxios.post.mockResolvedValueOnce({ data: { data: mockSchema } }); const docUrl = "https://api.example.com/graphql"; const headers = { Auth: "key" }; const params = { p: "1" }; const doc = new DocumentationFetcher( { documentationUrl: docUrl, urlHost: "https://api.example.com", urlPath: "/graphql", headers, queryParams: params, }, {}, metadata, ); const result = await doc.fetchAndProcess(); expect(mockedAxios.post).toHaveBeenCalledWith( docUrl, expect.objectContaining({ operationName: "IntrospectionQuery" }), { headers, params, timeout: server_defaults.DOCUMENTATION.TIMEOUTS.AXIOS }, ); expect(result).toBe(JSON.stringify(mockSchema.__schema)); expect(playwright.chromium.launch).not.toHaveBeenCalled(); }); it("should fall back to Playwright fetch if GraphQL introspection fails", async () => { const htmlDoc = `<html><body>GraphQL Maybe?</body></html>`; mockedAxios.post.mockRejectedValueOnce(new Error("GraphQL Network Error")); // Simulate network failure mockPage.evaluate.mockResolvedValue({ html: htmlDoc, textContent: "GraphQL Maybe?", links: {}, }); const docUrl = "https://api.example.com/graphql"; // Looks like GraphQL const doc = new DocumentationFetcher( { documentationUrl: docUrl, urlHost: "https://api.example.com" }, {}, metadata, ); const result = await doc.fetchAndProcess(); // Check GraphQL was attempted expect(mockedAxios.post).toHaveBeenCalledWith(docUrl, expect.anything(), expect.anything()); // Check Playwright was used as fallback expect(playwright.chromium.launch).toHaveBeenCalledTimes(1); expect(mockPage.evaluate).toHaveBeenCalledTimes(1); // Check result is from Playwright fetch (processed HTML) expect(result).toContain("GraphQL Maybe?"); }); it("should fall back to Playwright fetch if GraphQL returns errors", async () => { const htmlDoc = `<html><body>GraphQL Maybe?</body></html>`; mockedAxios.post.mockResolvedValueOnce({ data: { errors: [{ message: "Bad Query" }] } }); // Simulate GQL error response mockPage.evaluate.mockResolvedValue({ html: htmlDoc, textContent: "GraphQL Maybe?", links: {}, }); const docUrl = "https://api.example.com/graphql"; // Looks like GraphQL const doc = new DocumentationFetcher( { documentationUrl: docUrl, urlHost: "https://api.example.com" }, {}, metadata, ); const result = await doc.fetchAndProcess(); // Check GraphQL was attempted expect(mockedAxios.post).toHaveBeenCalledWith(docUrl, expect.anything(), expect.anything()); // Check Playwright was used as fallback expect(playwright.chromium.launch).toHaveBeenCalledTimes(1); expect(mockPage.evaluate).toHaveBeenCalledTimes(1); // Check result is from Playwright fetch (processed HTML) expect(result).toContain("GraphQL Maybe?"); }); it("should extract and fetch relative OpenAPI URL found in HTML", async () => { const openApiJson = { openapi: "3.0.1", info: { title: "My API" } }; const baseUrl = "https://base.example.com/docs"; // Mock Axios to return OpenAPI spec directly (simulating Axios strategy success) mockedAxios.get.mockResolvedValue({ data: openApiJson }); const doc = new DocumentationFetcher( { documentationUrl: baseUrl, urlHost: "https://api.example.com" }, {}, metadata, ); const result = await doc.fetchAndProcess(); // Verify result contains the OpenAPI spec (formatted with indentation) expect(result).toContain('"openapi": "3.0.1"'); expect(result).toContain('"title": "My API"'); }); it("should handle page content being the OpenAPI spec directly (JSON)", async () => { const openApiJsonString = JSON.stringify({ swagger: "2.0", info: { title: "Direct JSON" } }); mockPage.evaluate.mockResolvedValue({ html: openApiJsonString, textContent: openApiJsonString, links: {}, }); // Mock sitemap requests to fail mockedAxios.get.mockRejectedValue(new Error("404")); const docUrl = "https://api.example.com/openapi.json"; const doc = new DocumentationFetcher( { documentationUrl: docUrl, urlHost: "https://api.example.com" }, {}, metadata, ); const result = await doc.fetchAndProcess(); expect(playwright.chromium.launch).toHaveBeenCalledTimes(1); expect(mockPage.evaluate).toHaveBeenCalledTimes(1); expect(result).toContain(openApiJsonString); }); it("should handle page content being the OpenAPI spec directly (YAML)", async () => { const openApiYaml = `openapi: 3.1.0\ninfo:\n title: Direct YAML`; mockPage.evaluate.mockResolvedValue({ html: openApiYaml, textContent: openApiYaml, links: {}, }); // Mock sitemap requests to fail mockedAxios.get.mockRejectedValue(new Error("404")); const docUrl = "https://api.example.com/openapi.yaml"; const doc = new DocumentationFetcher( { documentationUrl: docUrl, urlHost: "https://api.example.com" }, {}, metadata, ); const result = await doc.fetchAndProcess(); expect(playwright.chromium.launch).toHaveBeenCalledTimes(1); expect(mockPage.evaluate).toHaveBeenCalledTimes(1); expect(result).toBe(openApiYaml); }); it("should fall back to HTML->Markdown if OpenAPI extraction/fetch fails", async () => { const swaggerHtml = `<html><script id="swagger-settings">{ "url": "/missing.json" }</script><body>Content</body></html>`; mockPage.evaluate.mockResolvedValue({ html: swaggerHtml, textContent: "Content", links: {} }); // All requests fail mockedAxios.get.mockRejectedValue(new Error("404 Not Found")); const headers = { Auth: "key" }; const docUrl = "https://api.example.com/docs"; const doc = new DocumentationFetcher( { documentationUrl: docUrl, urlHost: "https://api.example.com", headers }, {}, metadata, ); const result = await doc.fetchAndProcess(); expect(playwright.chromium.launch).toHaveBeenCalledTimes(1); // Result should be the Markdown conversion of the original HTML expect(result).toContain("Content"); expect(result).not.toContain("missing.json"); }); it("should handle Playwright fetch errors gracefully", async () => { vi.mocked(playwright.chromium.launch).mockRejectedValueOnce( new Error("Browser launch failed"), ); const doc = new DocumentationFetcher( { documentationUrl: "https://api.example.com/docs", urlHost: "https://api.example.com" }, {}, metadata, ); const result = await doc.fetchAndProcess(); expect(result).toBe(""); // Should return empty string on complete failure expect(mockedAxios.get).toHaveBeenCalled(); // should call axios instead }); it("should cache the result and return processed result on subsequent calls", async () => { // Test with a simple text response via Axios const plainDoc = "Plain text data"; // Mock Axios to return plain text (first strategy to succeed) mockedAxios.get.mockResolvedValue({ data: plainDoc }); const httpDoc = new DocumentationFetcher( { documentationUrl: "http://example.com/docs.txt" }, {}, metadata, ); const resHttp1 = await httpDoc.fetchAndProcess(); expect(resHttp1).toBe(plainDoc); // Reset the call count for the second call to test caching const initialCallCount = mockedAxios.get.mock.calls.length; const resHttp2 = await httpDoc.fetchAndProcess(); expect(resHttp2).toBe(plainDoc); expect(mockedAxios.get.mock.calls.length).toBe(initialCallCount); // No additional calls (cached) }); }); describe("extractRelevantSections", () => { const documentationSearch = new DocumentationSearch({ orgId: "test" }); it("should return empty string for empty documentation", () => { const result = documentationSearch.extractRelevantSections("", "some instruction"); expect(result).toBe(""); }); it("should return whole doc if no valid search terms but doc is small", () => { const doc = "Some documentation content here"; const result = documentationSearch.extractRelevantSections(doc, "a b c"); // All terms too short expect(result).toBe(doc); // Returns whole doc since it's smaller than section size }); it("should return whole doc if smaller than section size", () => { const doc = "Short documentation"; const result = documentationSearch.extractRelevantSections(doc, "documentation", 5, 500); expect(result).toBe(doc); }); it("should return empty string if no sections match search terms", () => { const doc = "A".repeat(1000); const result = documentationSearch.extractRelevantSections(doc, "nonexistent term", 5, 200); expect(result).toBe(""); }); it("should extract sections matching search terms", () => { const doc = "prefix ".repeat(50) + "important api endpoint here " + "suffix ".repeat(50); const result = documentationSearch.extractRelevantSections(doc, "api endpoint", 3, 200); expect(result).toContain("api"); expect(result).toContain("endpoint"); expect(result.length).toBeLessThanOrEqual(3 * 200); }); it("should respect maxSections parameter", () => { const section1 = "first section with keyword api " + "x".repeat(170); const section2 = "second section with keyword api " + "y".repeat(170); const section3 = "third section with keyword api " + "z".repeat(170); const doc = section1 + section2 + section3; const result = documentationSearch.extractRelevantSections(doc, "api", 2, 200); const sections = result.split("\n\n"); expect(sections.length).toBeLessThanOrEqual(2); expect(result.length).toBeLessThanOrEqual(2 * 200); }); it("should respect sectionSize parameter", () => { const doc = "test api ".repeat(100); // ~900 chars const result = documentationSearch.extractRelevantSections(doc, "api test", 3, 250); // Should create sections of 250 chars each expect(result.length).toBeLessThanOrEqual(3 * 250); expect(result).toContain("api"); expect(result).toContain("test"); }); it("should handle multiple search terms and score accordingly", () => { const section1 = "authentication and authorization required " + "x".repeat(160); const section2 = "just some random content here " + "y".repeat(170); const section3 = "authentication mentioned once " + "z".repeat(170); const doc = section1 + section2 + section3; const result = documentationSearch.extractRelevantSections( doc, "authentication authorization", 2, 200, ); // Section 1 should score highest (has both terms) // Section 3 should score second (has one term) // Section 2 should not be included (has no terms) expect(result).toContain("authentication"); expect(result).toContain("authorization"); expect(result).not.toContain("random content"); }); it("should maintain section order after scoring", () => { const section1 = "first match for keyword " + "a".repeat(176); const section2 = "no matches here at all " + "b".repeat(177); const section3 = "third match for keyword " + "c".repeat(176); const doc = section1 + section2 + section3; const result = documentationSearch.extractRelevantSections(doc, "keyword", 2, 200); // Both matching sections should be included in their original order const firstIndex = result.indexOf("first"); const thirdIndex = result.indexOf("third"); expect(firstIndex).toBeLessThan(thirdIndex); }); it("should validate and adjust input parameters", () => { const doc = "test content ".repeat(100); // Test with invalid maxSections (too high) const result1 = documentationSearch.extractRelevantSections(doc, "test", 150, 200); expect(result1).toContain("test"); // Test with invalid sectionSize (too small) const result2 = documentationSearch.extractRelevantSections(doc, "test", 5, 50); expect(result2).toContain("test"); // Test with 0 or negative values const result3 = documentationSearch.extractRelevantSections(doc, "test", 0, -100); expect(result3).toContain("test"); }); it("should filter search terms by minimum length", () => { const doc = "authentication system for api access"; // "for" should be filtered out (too short) const result = documentationSearch.extractRelevantSections(doc, "for api", 1, 200); expect(result).toContain("api"); // Returns whole doc if all terms are too short and doc is small const result2 = documentationSearch.extractRelevantSections(doc, "a or by", 1, 200); expect(result2).toBe(doc); // Whole doc since it's smaller than section size }); describe("OpenAPI schema integration", () => { it("should extract security information when security keywords are present", () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, components: { securitySchemes: { bearerAuth: { type: "http", scheme: "bearer", bearerFormat: "JWT", }, apiKey: { type: "apiKey", in: "header", name: "X-API-Key", }, }, }, security: [{ bearerAuth: [] }], paths: { "/users": { get: { summary: "Get users", operationId: "getUsers", responses: { "200": { description: "Success" } }, }, }, }, }); const doc = "General documentation content about the API usage."; const result = documentationSearch.extractRelevantSections( doc, "authentication bearer token", 5, 2000, openApiSpec, ); expect(result).toContain("=== SECURITY ==="); expect(result).toContain("bearerAuth"); expect(result).toContain("bearer"); expect(result).toContain("JWT"); expect(result).toContain("apiKey"); expect(result).toContain("X-API-Key"); }); it("should not extract security info when no security keywords in query", () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, components: { securitySchemes: { bearerAuth: { type: "http", scheme: "bearer" }, }, }, paths: { "/users": { get: { summary: "Get users", operationId: "getUsers", tags: ["users"], responses: { "200": { description: "Success" } }, }, }, }, }); const doc = "General documentation content."; const result = documentationSearch.extractRelevantSections( doc, "users list", 5, 2000, openApiSpec, ); expect(result).not.toContain("=== SECURITY ==="); expect(result).not.toContain("bearerAuth"); }); it("should extract and rank relevant OpenAPI operations based on search terms", () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, paths: { "/users": { get: { summary: "List all users", operationId: "listUsers", tags: ["users"], description: "Get a list of all users in the system", responses: { "200": { description: "Success" } }, }, post: { summary: "Create a user", operationId: "createUser", tags: ["users"], description: "Create a new user account", responses: { "201": { description: "Created" } }, }, }, "/products": { get: { summary: "List products", operationId: "listProducts", tags: ["products"], description: "Get all products from catalog", responses: { "200": { description: "Success" } }, }, }, "/users/{id}": { get: { summary: "Get user by ID", operationId: "getUserById", tags: ["users"], description: "Fetch a single user by their unique identifier", parameters: [ { name: "id", in: "path", required: true, schema: { type: "string" } }, ], responses: { "200": { description: "Success" } }, }, }, }, }); const doc = "Some general documentation text."; const result = documentationSearch.extractRelevantSections( doc, "users account identifier", 5, 2000, openApiSpec, ); expect(result).toContain("=== OPENAPI OPERATIONS ==="); expect(result).toContain("[GET /users]"); expect(result).toContain("listUsers"); // Should not include products endpoint since search terms only match user-related operations expect(result).not.toContain("products"); expect(result).not.toContain("listProducts"); }); it("should match operations by path, method, operationId, and description", () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, paths: { "/auth/login": { post: { summary: "User login", operationId: "loginUser", tags: ["authentication"], description: "Authenticate user with credentials and return token", responses: { "200": { description: "Success" } }, }, }, "/auth/logout": { post: { summary: "User logout", operationId: "logoutUser", tags: ["authentication"], description: "Invalidate user session token", responses: { "200": { description: "Success" } }, }, }, "/users/profile": { get: { summary: "Get profile", operationId: "getProfile", tags: ["users"], description: "Get current user profile", responses: { "200": { description: "Success" } }, }, }, }, }); // Search by path component const result1 = documentationSearch.extractRelevantSections( "", "auth login", 5, 2000, openApiSpec, ); expect(result1).toContain("[POST /auth/login]"); expect(result1).toContain("loginUser"); expect(result1).toContain("Authenticate user with credentials"); // Search by operationId const result2 = documentationSearch.extractRelevantSections( "", "logoutUser", 5, 2000, openApiSpec, ); expect(result2).toContain("[POST /auth/logout]"); expect(result2).toContain("logoutUser"); // Search by tag const result3 = documentationSearch.extractRelevantSections( "", "authentication", 5, 2000, openApiSpec, ); expect(result3).toContain("authentication"); expect(result3).toContain("login"); }); it("should limit number of returned operations based on maxSections", () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, paths: { "/users": { get: { summary: "List users", operationId: "listUsers", description: "Get all users in the system", responses: { "200": { description: "Success" } }, }, post: { summary: "Create user", operationId: "createUser", description: "Create a new user account", responses: { "201": { description: "Created" } }, }, }, "/users/{id}": { get: { summary: "Get user", operationId: "getUser", description: "Retrieve user by ID", responses: { "200": { description: "Success" } }, }, put: { summary: "Update user", operationId: "updateUser", description: "Update user information", responses: { "200": { description: "Success" } }, }, delete: { summary: "Delete user", operationId: "deleteUser", description: "Remove user from system", responses: { "204": { description: "Deleted" } }, }, }, }, }); // With maxSections=2, should only get top 2 matching operations const result = documentationSearch.extractRelevantSections( "", "users", 2, 2000, openApiSpec, ); expect(result).toContain("=== OPENAPI OPERATIONS ==="); // Count operation delimiters to verify we got limited results const operationCount = (result.match(/\[(?:GET|POST|PUT|DELETE) /g) || []).length; expect(operationCount).toBeLessThanOrEqual(2); }); it("should handle OpenAPI spec with parameters in operations", () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, paths: { "/search": { get: { summary: "Search items", operationId: "searchItems", description: "Search for items using query parameters", parameters: [ { name: "query", in: "query", required: true, schema: { type: "string" } }, { name: "limit", in: "query", schema: { type: "integer" } }, { name: "offset", in: "query", schema: { type: "integer" } }, ], responses: { "200": { description: "Success" } }, }, }, }, }); // Search should match parameter names const result = documentationSearch.extractRelevantSections( "", "query limit search", 5, 2000, openApiSpec, ); expect(result).toContain("[GET /search]"); expect(result).toContain("searchItems"); expect(result).toContain("query"); expect(result).toContain("limit"); }); it("should combine documentation sections with OpenAPI operations", () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, paths: { "/users": { get: { summary: "Get users", operationId: "getUsers", description: "List all users", responses: { "200": { description: "Success" } }, }, }, }, }); const doc = "This documentation explains how to use the users endpoint. " + "The users API allows you to manage user accounts. " + "You can list, create, update, and delete users."; const result = documentationSearch.extractRelevantSections( doc, "users", 5, 2000, openApiSpec, ); // Should contain both documentation sections and OpenAPI operations // Note: === DOCUMENTATION === header is only added when security info is present expect(result).toContain("users endpoint"); expect(result).toContain("manage user accounts"); expect(result).toContain("=== OPENAPI OPERATIONS ==="); expect(result).toContain("[GET /users]"); expect(result).toContain("getUsers"); }); it("should add DOCUMENTATION header when security info is also present", () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, components: { securitySchemes: { bearerAuth: { type: "http", scheme: "bearer", }, }, }, security: [{ bearerAuth: [] }], paths: { "/users": { get: { summary: "Get users", operationId: "getUsers", description: "List all users", responses: { "200": { description: "Success" } }, }, }, }, }); const doc = "This documentation explains the API. Users endpoint allows managing accounts."; const result = documentationSearch.extractRelevantSections( doc, "users authentication", 5, 2000, openApiSpec, ); // With security keywords, should have all three sections with headers expect(result).toContain("=== SECURITY ==="); expect(result).toContain("bearerAuth"); expect(result).toContain("=== DOCUMENTATION ==="); expect(result).toContain("Users endpoint"); expect(result).toContain("=== OPENAPI OPERATIONS ==="); expect(result).toContain("[GET /users]"); }); it("should handle Google Discovery schema format", () => { const googleDiscoverySpec = JSON.stringify({ kind: "discovery#restDescription", name: "testapi", version: "v1", resources: { users: { methods: { list: { id: "testapi.users.list", path: "users", httpMethod: "GET", description: "Lists all users in the system", parameters: { maxResults: { type: "integer", location: "query", }, }, }, insert: { id: "testapi.users.insert", path: "users", httpMethod: "POST", description: "Creates a new user", }, }, }, }, }); const result = documentationSearch.extractRelevantSections( "", "users list", 5, 2000, googleDiscoverySpec, ); expect(result).toContain("=== OPENAPI OPERATIONS ==="); expect(result).toContain("testapi.users.list"); expect(result).toContain("Lists all users"); expect(result).toContain("GET"); }); }); }); describe("Sitemap and URL Ranking", () => { let strategy: PlaywrightFetchingStrategy; beforeEach(() => { strategy = new PlaywrightFetchingStrategy(); vi.clearAllMocks(); mockedAxios = axios as Mocked<typeof axios>; mockedAxios.get.mockReset(); mockedAxios.post.mockReset(); ({ mockPage, mockContext, mockBrowser } = createPlaywrightMocks()); }); describe("rankItems", () => { it("should filter out URLs with excluded keywords", () => { const urls = [ "https://api.com/docs/getting-started", "https://api.com/pricing", "https://api.com/docs/authentication", "https://api.com/signup", "https://api.com/blog/updates", ]; const keywords = ["docs", "authentication"]; const ranked = strategy.rankItems(urls, keywords); // Should exclude pricing, signup, and blog completely expect(ranked).toHaveLength(2); expect(ranked[0]).toBe("https://api.com/docs/authentication"); expect(ranked[1]).toBe("https://api.com/docs/getting-started"); expect(ranked).not.toContain("https://api.com/pricing"); expect(ranked).not.toContain("https://api.com/signup"); expect(ranked).not.toContain("https://api.com/blog/updates"); }); it("should rank URLs by keyword match count divided by URL length", () => { const urls = [ "https://example.com/v1/users/read/fast", // No 'api' in domain, 1 match "https://api.com/documentation/api/v1/users/endpoints", // Long, 2 matches "https://api.com/api/users", // Short, 2 matches ]; const keywords = ["api", "users"]; const ranked = strategy.rankItems(urls, keywords) as string[]; // api/users should rank highest (2 matches, shortest URL with api) expect(ranked[0]).toBe("https://api.com/api/users"); // Long URL with 2 matches should be second expect(ranked[1]).toBe("https://api.com/documentation/api/v1/users/endpoints"); // URL with only 1 match should be last expect(ranked[2]).toBe("https://example.com/v1/users/read/fast"); }); it("should handle link objects with text", () => { const links = [ { linkText: "API Reference", href: "https://api.com/reference" }, { linkText: "Getting Started", href: "https://api.com/start" }, { linkText: "Pricing Plans", href: "https://api.com/pricing" }, ]; const keywords = ["api", "reference"]; const ranked = strategy.rankItems(links, keywords); expect(ranked).toHaveLength(2); // Pricing excluded completely expect(ranked[0]).toEqual({ linkText: "API Reference", href: "https://api.com/reference" }); expect(ranked[1]).toEqual({ linkText: "Getting Started", href: "https://api.com/start" }); expect(ranked).not.toContainEqual({ linkText: "Pricing Plans", href: "https://api.com/pricing", }); }); it("should filter already fetched links when provided", () => { const urls = [ "https://api.com/docs/intro", "https://api.com/docs/api", "https://api.com/docs/guide", ]; const fetchedLinks = new Set(["https://api.com/docs/intro"]); const keywords = ["docs"]; const ranked = strategy.rankItems(urls, keywords, fetchedLinks) as string[]; expect(ranked).toHaveLength(2); expect(ranked).not.toContain("https://api.com/docs/intro"); }); }); describe("Sitemap fetching", () => { it("should fetch and parse XML sitemap", async () => { const sitemapXml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url><loc>https://api.com/docs/intro</loc></url> <url><loc>https://api.com/docs/auth</loc></url> <url><loc>https://api.com/pricing</loc></url> <url><loc>https://api.com/docs/api</loc></url> </urlset>`; // Mock sitemap fetch mockedAxios.get.mockImplementation((url: string) => { if (url.includes("sitemap.xml")) { return Promise.resolve({ data: sitemapXml }); } return Promise.reject(new Error("404")); }); // Mock page fetches mockPage.evaluate.mockResolvedValue({ html: "<html><body>Content</body></html>", textContent: "Content", links: {}, }); const doc = new DocumentationFetcher( { documentationUrl: "https://api.com/docs", keywords: ["api", "auth"], }, {}, metadata, ); const result = await doc.fetchAndProcess(); // Should fetch sitemap expect(mockedAxios.get).toHaveBeenCalledWith( expect.stringContaining("sitemap.xml"), expect.any(Object), ); // Should have fetched pages (excluding pricing due to excluded keywords) expect(mockPage.goto).toHaveBeenCalled(); expect(result).toContain("Content"); }); it("should handle sitemap index with nested sitemaps", async () => { const sitemapIndex = `<?xml version="1.0" encoding="UTF-8"?> <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <sitemap><loc>https://api.com/docs/sitemap.xml</loc></sitemap> <sitemap><loc>https://api.com/blog/sitemap.xml</loc></sitemap> </sitemapindex>`; const docsSitemap = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url><loc>https://api.com/docs/intro</loc></url> <url><loc>https://api.com/docs/api</loc></url> </urlset>`; mockedAxios.get.mockImplementation((url: string) => { if (url.includes("sitemap_index.xml") || url === "https://api.com/sitemap.xml") { return Promise.resolve({ data: sitemapIndex }); } if (url.includes("docs/sitemap.xml")) { return Promise.resolve({ data: docsSitemap }); } return Promise.reject(new Error("404")); }); mockPage.evaluate.mockResolvedValue({ html: "<html><body>Docs</body></html>", textContent: "Docs", links: {}, }); const doc = new DocumentationFetcher( { documentationUrl: "https://api.com/docs", keywords: ["docs"], }, {}, metadata, ); const result = await doc.fetchAndProcess(); // Should fetch main sitemap and docs sitemap (not blog due to filtering) expect(mockedAxios.get).toHaveBeenCalledWith( expect.stringContaining("sitemap"), expect.any(Object), ); expect(result).toContain("Docs"); }); it("should fall back to legacy crawling if no sitemap found", async () => { // All sitemap requests fail mockedAxios.get.mockRejectedValue(new Error("404")); // Mock initial page with links mockPage.evaluate.mockResolvedValueOnce({ html: "<html><body>Main Page</body></html>", textContent: "Main Page", links: { "api reference https docs api": "https://api.com/docs/api", "getting started https docs start": "https://api.com/docs/start", }, }); const doc = new DocumentationFetcher( { documentationUrl: "https://api.com/docs", keywords: ["api"], }, {}, metadata, ); const result = await doc.fetchAndProcess(); // Should use legacy crawling expect(mockPage.goto).toHaveBeenCalled(); expect(result).toContain("Main Page"); }); it("should respect MAX_FETCHED_LINKS limit", async () => { // Create a sitemap with many URLs const urls = Array.from( { length: 100 }, (_, i) => `<url><loc>https://api.com/docs/page${i}</loc></url>`, ).join(""); const sitemapXml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> ${urls} </urlset>`; mockedAxios.get.mockImplementation((url: string) => { if (url.includes("sitemap.xml")) { return Promise.resolve({ data: sitemapXml }); } return Promise.reject(new Error("404")); }); mockPage.evaluate.mockResolvedValue({ html: "<html><body>Page</body></html>", textContent: "Page", links: {}, }); const doc = new DocumentationFetcher( { documentationUrl: "https://api.com/docs", keywords: ["docs"], }, {}, metadata, ); await doc.fetchAndProcess(); // Should respect the limit (default is 10) expect(mockPage.goto).toHaveBeenCalledTimes( server_defaults.DOCUMENTATION.MAX_FETCHED_LINKS, ); }); it("should filter sitemap URLs by path relevance", async () => { const sitemapXml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url><loc>https://api.com/docs/api/intro</loc></url> <url><loc>https://api.com/company/about</loc></url> <url><loc>https://api.com/docs/api/auth</loc></url> <url><loc>https://api.com/marketing/landing</loc></url> </urlset>`; mockedAxios.get.mockImplementation((url: string) => { // Return sitemap for the first matching candidate if (url.includes("sitemap.xml")) { return Promise.resolve({ data: sitemapXml }); } return Promise.reject(new Error("404")); }); mockPage.evaluate.mockResolvedValue({ html: "<html><body>Content</body></html>", textContent: "Content", links: {}, }); const doc = new DocumentationFetcher( { documentationUrl: "https://api.com/docs/api", keywords: ["intro", "auth"], // Keywords that match the URLs }, {}, metadata, ); await doc.fetchAndProcess(); // Should have fetched some pages const calledUrls = mockPage.goto.mock.calls.map((call) => call[0]); expect(calledUrls.length).toBeGreaterThan(0); // Verify that URL filtering worked by checking the fetched URLs // The implementation filters at collection time, so we should only see relevant URLs expect(calledUrls.some((url) => url.includes("intro") || url.includes("auth"))).toBe(true); }); it("should deduplicate similar page content based on similarity threshold", async () => { const sitemapXml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url><loc>https://api.com/docs/page1</loc></url> <url><loc>https://api.com/docs/page2</loc></url> <url><loc>https://api.com/docs/page3</loc></url> <url><loc>https://api.com/docs/page4</loc></url> </urlset>`; mockedAxios.get.mockImplementation((url: string) => { if (url.includes("sitemap.xml")) { return Promise.resolve({ data: sitemapXml }); } return Promise.reject(new Error("404")); }); // Mock page content with duplicates const uniqueContent1 = "Authentication API documentation with bearer token support and OAuth flows for secure access " + "x".repeat(500); const duplicateContent = "Authentication API documentation with bearer token support and OAuth flows for secure access " + "x".repeat(500); const uniqueContent2 = "Completely different content about webhooks and event subscriptions for real-time updates " + "y".repeat(500); let callCount = 0; mockPage.evaluate.mockImplementation(() => { callCount++; if (callCount === 1) { return Promise.resolve({ html: `<html><body>${uniqueContent1}</body></html>`, textContent: uniqueContent1, links: {}, }); } else if (callCount === 2) { return Promise.resolve({ html: `<html><body>${duplicateContent}</body></html>`, textContent: duplicateContent, links: {}, }); } else if (callCount === 3) { return Promise.resolve({ html: `<html><body>${uniqueContent2}</body></html>`, textContent: uniqueContent2, links: {}, }); } else { return Promise.resolve({ html: `<html><body>${duplicateContent}</body></html>`, textContent: duplicateContent, links: {}, }); } }); const doc = new DocumentationFetcher( { documentationUrl: "https://api.com/docs", keywords: ["api"], }, {}, metadata, ); const result = await doc.fetchAndProcess(); // Should have fetched multiple pages expect(mockPage.goto).toHaveBeenCalled(); expect(callCount).toBeGreaterThan(1); // Result should contain unique content expect(result).toContain("Authentication API documentation"); expect(result).toContain("webhooks and event subscriptions"); // Count occurrences of the duplicate content - should only appear once const occurrences = ( result.match(/Authentication API documentation with bearer token support/g) || [] ).length; expect(occurrences).toBe(1); }); }); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/superglue-ai/superglue'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

documentation.test.ts•46.3 KiB