Skip to main content
Glama
documentation.test.ts53.6 kB
import playwright from '@playwright/test'; import { Metadata } from '@superglue/shared'; import axios from 'axios'; import { afterEach, beforeEach, describe, expect, it, Mocked, vi } from 'vitest'; import { server_defaults } from '../default.js'; import { DocumentationFetcher } from './documentation-fetching.js'; import { PlaywrightFetchingStrategy } from './strategies/index.js'; import { DocumentationSearch } from './documentation-search.js'; // Mock playwright and axios vi.mock('@playwright/test', async (importOriginal) => { const original = await importOriginal() as any; return { ...original, // Preserve other exports if any default: { chromium: { launch: vi.fn(), }, }, }; }); vi.mock('axios'); // Helper to create standard Playwright mocks const createPlaywrightMocks = () => { const mockPage = { goto: vi.fn().mockResolvedValue(undefined), waitForLoadState: vi.fn().mockResolvedValue(undefined), waitForTimeout: vi.fn().mockResolvedValue(undefined), addInitScript: vi.fn().mockResolvedValue(undefined), content: vi.fn().mockResolvedValue(''), evaluate: vi.fn().mockResolvedValue(undefined), close: vi.fn().mockResolvedValue(undefined), }; const mockContext = { newPage: vi.fn().mockResolvedValue(mockPage), close: vi.fn().mockResolvedValue(undefined), }; const mockBrowser = { newContext: vi.fn().mockResolvedValue(mockContext), close: vi.fn().mockResolvedValue(undefined), }; // Setup the browser launch mock with a type assertion vi.mocked(playwright.chromium.launch).mockResolvedValue(mockBrowser as unknown as playwright.Browser); return { mockPage, mockContext, mockBrowser }; }; describe('Documentation Class', () => { let mockPage: any; let mockContext: any; let mockBrowser: any; let mockedAxios: Mocked<typeof axios>; // Use Mocked type let metadata: Metadata = { orgId: '' }; beforeEach(() => { // Reset all mocks vi.clearAllMocks(); mockedAxios = axios as Mocked<typeof axios>; // Ensure axios is typed correctly mockedAxios.get.mockReset(); // Reset mocks specifically mockedAxios.post.mockReset(); // Set LLM_PROVIDER env var to prevent errors when accessing LanguageModel.contextLength process.env.LLM_PROVIDER = 'ANTHROPIC'; // Create standard mocks for Playwright ({ mockPage, mockContext, mockBrowser } = createPlaywrightMocks()); }); afterEach(async () => { // Use the static closeBrowser from the strategy class await PlaywrightFetchingStrategy.closeBrowser(); }); describe('fetchAndProcess', () => { it('should fetch and convert HTML documentation via Playwright', async () => { const htmlDoc = ` <html><body><h1>API Docs</h1><p>Details here.</p></body></html> `; mockPage.evaluate.mockResolvedValue({ html: htmlDoc, textContent: 'API Docs Details here.', links: {} }); // Mock sitemap requests to fail (404) mockedAxios.get.mockRejectedValue(new Error('404')); const docUrl = 'https://api.example.com/docs'; const doc = new DocumentationFetcher({ documentationUrl: docUrl, urlHost: 'https://api.example.com' }, {}, metadata); const result = await doc.fetchAndProcess(); expect(playwright.chromium.launch).toHaveBeenCalledTimes(1); expect(mockBrowser.newContext).toHaveBeenCalledTimes(1); expect(mockContext.newPage).toHaveBeenCalledTimes(1); expect(mockPage.goto).toHaveBeenCalledWith(docUrl, { timeout: server_defaults.DOCUMENTATION.TIMEOUTS.PLAYWRIGHT }); expect(mockPage.waitForLoadState).toHaveBeenCalledWith('domcontentloaded', { timeout: server_defaults.DOCUMENTATION.TIMEOUTS.PLAYWRIGHT }); expect(mockPage.waitForTimeout).toHaveBeenCalledWith(1000); expect(mockPage.evaluate).toHaveBeenCalledTimes(1); // Single evaluate for DOM manipulation and link extraction expect(result).toContain('# API Docs'); expect(result).toContain('Details here.'); // Sitemap fetches are attempted expect(mockedAxios.get).toHaveBeenCalled(); expect(mockedAxios.post).not.toHaveBeenCalled(); }); it('should return raw page content if not HTML, GraphQL, or OpenAPI', async () => { const plainDoc = 'Plain text documentation content.'; mockPage.evaluate.mockResolvedValue({ html: plainDoc, textContent: plainDoc, links: {} }) // Mock sitemap requests to fail mockedAxios.get.mockRejectedValue(new Error('404')); const doc = new DocumentationFetcher({ documentationUrl: 'https://api.example.com/raw', urlHost: 'https://api.example.com' }, {}, metadata); const result = await doc.fetchAndProcess(); expect(playwright.chromium.launch).toHaveBeenCalledTimes(1); expect(mockPage.evaluate).toHaveBeenCalledTimes(1); expect(result).toBe(plainDoc); expect(mockedAxios.get).toHaveBeenCalled(); // Sitemap attempts expect(mockedAxios.post).not.toHaveBeenCalled(); }); it('should attempt GraphQL introspection for likely GraphQL URLs', async () => { const mockSchema = { __schema: { types: [{ name: 'Query' }] } }; mockedAxios.post.mockResolvedValueOnce({ data: { data: mockSchema } }); const docUrl = 'https://api.example.com/graphql'; const headers = { 'Auth': 'key' }; const params = { 'p': '1' }; const doc = new DocumentationFetcher({ documentationUrl: docUrl, urlHost: 'https://api.example.com', urlPath: '/graphql', headers, queryParams: params }, {}, metadata); const result = await doc.fetchAndProcess(); expect(mockedAxios.post).toHaveBeenCalledWith( docUrl, expect.objectContaining({ operationName: 'IntrospectionQuery' }), { headers, params, timeout: server_defaults.DOCUMENTATION.TIMEOUTS.AXIOS } ); expect(result).toBe(JSON.stringify(mockSchema.__schema)); expect(playwright.chromium.launch).not.toHaveBeenCalled(); }); it('should fall back to Playwright fetch if GraphQL introspection fails', async () => { const htmlDoc = `<html><body>GraphQL Maybe?</body></html>`; mockedAxios.post.mockRejectedValueOnce(new Error('GraphQL Network Error')); // Simulate network failure mockPage.evaluate.mockResolvedValue({ html: htmlDoc, textContent: 'GraphQL Maybe?', links: {} }) const docUrl = 'https://api.example.com/graphql'; // Looks like GraphQL const doc = new DocumentationFetcher({ documentationUrl: docUrl, urlHost: 'https://api.example.com' }, {}, metadata); const result = await doc.fetchAndProcess(); // Check GraphQL was attempted expect(mockedAxios.post).toHaveBeenCalledWith(docUrl, expect.anything(), expect.anything()); // Check Playwright was used as fallback expect(playwright.chromium.launch).toHaveBeenCalledTimes(1); expect(mockPage.evaluate).toHaveBeenCalledTimes(1); // Check result is from Playwright fetch (processed HTML) expect(result).toContain('GraphQL Maybe?'); }); it('should fall back to Playwright fetch if GraphQL returns errors', async () => { const htmlDoc = `<html><body>GraphQL Maybe?</body></html>`; mockedAxios.post.mockResolvedValueOnce({ data: { errors: [{ message: 'Bad Query' }] } }); // Simulate GQL error response mockPage.evaluate.mockResolvedValue({ html: htmlDoc, textContent: 'GraphQL Maybe?', links: {} }) const docUrl = 'https://api.example.com/graphql'; // Looks like GraphQL const doc = new DocumentationFetcher({ documentationUrl: docUrl, urlHost: 'https://api.example.com' }, {}, metadata); const result = await doc.fetchAndProcess(); // Check GraphQL was attempted expect(mockedAxios.post).toHaveBeenCalledWith(docUrl, expect.anything(), expect.anything()); // Check Playwright was used as fallback expect(playwright.chromium.launch).toHaveBeenCalledTimes(1); expect(mockPage.evaluate).toHaveBeenCalledTimes(1); // Check result is from Playwright fetch (processed HTML) expect(result).toContain('GraphQL Maybe?'); }); it('should extract and fetch relative OpenAPI URL found in HTML', async () => { const openApiJson = { openapi: "3.0.1", info: { title: "My API" } }; const baseUrl = 'https://base.example.com/docs'; // Mock Axios to return OpenAPI spec directly (simulating Axios strategy success) mockedAxios.get.mockResolvedValue({ data: openApiJson }); const doc = new DocumentationFetcher({ documentationUrl: baseUrl, urlHost: 'https://api.example.com' }, {}, metadata); const result = await doc.fetchAndProcess(); // Verify result contains the OpenAPI spec (formatted with indentation) expect(result).toContain('"openapi": "3.0.1"'); expect(result).toContain('"title": "My API"'); }); it('should handle page content being the OpenAPI spec directly (JSON)', async () => { const openApiJsonString = JSON.stringify({ swagger: "2.0", info: { title: "Direct JSON" } }); mockPage.evaluate.mockResolvedValue({ html: openApiJsonString, textContent: openApiJsonString, links: {} }) // Mock sitemap requests to fail mockedAxios.get.mockRejectedValue(new Error('404')); const docUrl = 'https://api.example.com/openapi.json'; const doc = new DocumentationFetcher({ documentationUrl: docUrl, urlHost: 'https://api.example.com' }, {}, metadata); const result = await doc.fetchAndProcess(); expect(playwright.chromium.launch).toHaveBeenCalledTimes(1); expect(mockPage.evaluate).toHaveBeenCalledTimes(1); expect(result).toContain(openApiJsonString); }); it('should handle page content being the OpenAPI spec directly (YAML)', async () => { const openApiYaml = `openapi: 3.1.0\ninfo:\n title: Direct YAML`; mockPage.evaluate.mockResolvedValue({ html: openApiYaml, textContent: openApiYaml, links: {} }) // Mock sitemap requests to fail mockedAxios.get.mockRejectedValue(new Error('404')); const docUrl = 'https://api.example.com/openapi.yaml'; const doc = new DocumentationFetcher({ documentationUrl: docUrl, urlHost: 'https://api.example.com' }, {}, metadata); const result = await doc.fetchAndProcess(); expect(playwright.chromium.launch).toHaveBeenCalledTimes(1); expect(mockPage.evaluate).toHaveBeenCalledTimes(1); expect(result).toBe(openApiYaml); }); it('should fall back to HTML->Markdown if OpenAPI extraction/fetch fails', async () => { const swaggerHtml = `<html><script id="swagger-settings">{ "url": "/missing.json" }</script><body>Content</body></html>`; mockPage.evaluate.mockResolvedValue({ html: swaggerHtml, textContent: 'Content', links: {} }) // All requests fail mockedAxios.get.mockRejectedValue(new Error('404 Not Found')); const headers = { 'Auth': 'key' }; const docUrl = 'https://api.example.com/docs'; const doc = new DocumentationFetcher({ documentationUrl: docUrl, urlHost: 'https://api.example.com', headers }, {}, metadata); const result = await doc.fetchAndProcess(); expect(playwright.chromium.launch).toHaveBeenCalledTimes(1); // Result should be the Markdown conversion of the original HTML expect(result).toContain('Content'); expect(result).not.toContain('missing.json'); }); it('should handle Playwright fetch errors gracefully', async () => { vi.mocked(playwright.chromium.launch).mockRejectedValueOnce(new Error('Browser launch failed')); const doc = new DocumentationFetcher({ documentationUrl: 'https://api.example.com/docs', urlHost: 'https://api.example.com' }, {}, metadata); const result = await doc.fetchAndProcess(); expect(result).toBe(''); // Should return empty string on complete failure expect(mockedAxios.get).toHaveBeenCalled(); // should call axios instead }); it('should cache the result and return processed result on subsequent calls', async () => { // Test with a simple text response via Axios const plainDoc = 'Plain text data'; // Mock Axios to return plain text (first strategy to succeed) mockedAxios.get.mockResolvedValue({ data: plainDoc }); const httpDoc = new DocumentationFetcher({ documentationUrl: 'http://example.com/docs.txt' }, {}, metadata); const resHttp1 = await httpDoc.fetchAndProcess(); expect(resHttp1).toBe(plainDoc); // Reset the call count for the second call to test caching const initialCallCount = mockedAxios.get.mock.calls.length; const resHttp2 = await httpDoc.fetchAndProcess(); expect(resHttp2).toBe(plainDoc); expect(mockedAxios.get.mock.calls.length).toBe(initialCallCount); // No additional calls (cached) }); }); describe('extractRelevantSections', () => { const documentationSearch = new DocumentationSearch({ orgId: 'test' }); it('should return empty string for empty documentation', () => { const result = documentationSearch.extractRelevantSections("", "some instruction"); expect(result).toBe(""); }); it('should return whole doc if no valid search terms but doc is small', () => { const doc = "Some documentation content here"; const result = documentationSearch.extractRelevantSections(doc, "a b c"); // All terms too short expect(result).toBe(doc); // Returns whole doc since it's smaller than section size }); it('should return whole doc if smaller than section size', () => { const doc = "Short documentation"; const result = documentationSearch.extractRelevantSections(doc, "documentation", 5, 500); expect(result).toBe(doc); }); it('should return empty string if no sections match search terms', () => { const doc = "A".repeat(1000); const result = documentationSearch.extractRelevantSections(doc, "nonexistent term", 5, 200); expect(result).toBe(""); }); it('should extract sections matching search terms', () => { const doc = "prefix ".repeat(50) + "important api endpoint here " + "suffix ".repeat(50); const result = documentationSearch.extractRelevantSections(doc, "api endpoint", 3, 200); expect(result).toContain("api"); expect(result).toContain("endpoint"); expect(result.length).toBeLessThanOrEqual(3 * 200); }); it('should respect maxSections parameter', () => { const section1 = "first section with keyword api " + "x".repeat(170); const section2 = "second section with keyword api " + "y".repeat(170); const section3 = "third section with keyword api " + "z".repeat(170); const doc = section1 + section2 + section3; const result = documentationSearch.extractRelevantSections(doc, "api", 2, 200); const sections = result.split('\n\n'); expect(sections.length).toBeLessThanOrEqual(2); expect(result.length).toBeLessThanOrEqual(2 * 200); }); it('should respect sectionSize parameter', () => { const doc = "test api ".repeat(100); // ~900 chars const result = documentationSearch.extractRelevantSections(doc, "api test", 3, 250); // Should create sections of 250 chars each expect(result.length).toBeLessThanOrEqual(3 * 250); expect(result).toContain("api"); expect(result).toContain("test"); }); it('should handle multiple search terms and score accordingly', () => { const section1 = "authentication and authorization required " + "x".repeat(160); const section2 = "just some random content here " + "y".repeat(170); const section3 = "authentication mentioned once " + "z".repeat(170); const doc = section1 + section2 + section3; const result = documentationSearch.extractRelevantSections(doc, "authentication authorization", 2, 200); // Section 1 should score highest (has both terms) // Section 3 should score second (has one term) // Section 2 should not be included (has no terms) expect(result).toContain("authentication"); expect(result).toContain("authorization"); expect(result).not.toContain("random content"); }); it('should maintain section order after scoring', () => { const section1 = "first match for keyword " + "a".repeat(176); const section2 = "no matches here at all " + "b".repeat(177); const section3 = "third match for keyword " + "c".repeat(176); const doc = section1 + section2 + section3; const result = documentationSearch.extractRelevantSections(doc, "keyword", 2, 200); // Both matching sections should be included in their original order const firstIndex = result.indexOf("first"); const thirdIndex = result.indexOf("third"); expect(firstIndex).toBeLessThan(thirdIndex); }); it('should validate and adjust input parameters', () => { const doc = "test content ".repeat(100); // Test with invalid maxSections (too high) const result1 = documentationSearch.extractRelevantSections(doc, "test", 150, 200); expect(result1).toContain("test"); // Test with invalid sectionSize (too small) const result2 = documentationSearch.extractRelevantSections(doc, "test", 5, 50); expect(result2).toContain("test"); // Test with 0 or negative values const result3 = documentationSearch.extractRelevantSections(doc, "test", 0, -100); expect(result3).toContain("test"); }); it('should filter search terms by minimum length', () => { const doc = "authentication system for api access"; // "for" should be filtered out (too short) const result = documentationSearch.extractRelevantSections(doc, "for api", 1, 200); expect(result).toContain("api"); // Returns whole doc if all terms are too short and doc is small const result2 = documentationSearch.extractRelevantSections(doc, "a or by", 1, 200); expect(result2).toBe(doc); // Whole doc since it's smaller than section size }); describe('OpenAPI schema integration', () => { it('should extract security information when security keywords are present', () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, components: { securitySchemes: { bearerAuth: { type: "http", scheme: "bearer", bearerFormat: "JWT" }, apiKey: { type: "apiKey", in: "header", name: "X-API-Key" } } }, security: [ { bearerAuth: [] } ], paths: { "/users": { get: { summary: "Get users", operationId: "getUsers", responses: { "200": { description: "Success" } } } } } }); const doc = "General documentation content about the API usage."; const result = documentationSearch.extractRelevantSections(doc, "authentication bearer token", 5, 2000, openApiSpec); expect(result).toContain("=== SECURITY ==="); expect(result).toContain("bearerAuth"); expect(result).toContain("bearer"); expect(result).toContain("JWT"); expect(result).toContain("apiKey"); expect(result).toContain("X-API-Key"); }); it('should not extract security info when no security keywords in query', () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, components: { securitySchemes: { bearerAuth: { type: "http", scheme: "bearer" } } }, paths: { "/users": { get: { summary: "Get users", operationId: "getUsers", tags: ["users"], responses: { "200": { description: "Success" } } } } } }); const doc = "General documentation content."; const result = documentationSearch.extractRelevantSections(doc, "users list", 5, 2000, openApiSpec); expect(result).not.toContain("=== SECURITY ==="); expect(result).not.toContain("bearerAuth"); }); it('should extract and rank relevant OpenAPI operations based on search terms', () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, paths: { "/users": { get: { summary: "List all users", operationId: "listUsers", tags: ["users"], description: "Get a list of all users in the system", responses: { "200": { description: "Success" } } }, post: { summary: "Create a user", operationId: "createUser", tags: ["users"], description: "Create a new user account", responses: { "201": { description: "Created" } } } }, "/products": { get: { summary: "List products", operationId: "listProducts", tags: ["products"], description: "Get all products from catalog", responses: { "200": { description: "Success" } } } }, "/users/{id}": { get: { summary: "Get user by ID", operationId: "getUserById", tags: ["users"], description: "Fetch a single user by their unique identifier", parameters: [ { name: "id", in: "path", required: true, schema: { type: "string" } } ], responses: { "200": { description: "Success" } } } } } }); const doc = "Some general documentation text."; const result = documentationSearch.extractRelevantSections(doc, "users account identifier", 5, 2000, openApiSpec); expect(result).toContain("=== OPENAPI OPERATIONS ==="); expect(result).toContain("[GET /users]"); expect(result).toContain("listUsers"); // Should not include products endpoint since search terms only match user-related operations expect(result).not.toContain("products"); expect(result).not.toContain("listProducts"); }); it('should match operations by path, method, operationId, and description', () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, paths: { "/auth/login": { post: { summary: "User login", operationId: "loginUser", tags: ["authentication"], description: "Authenticate user with credentials and return token", responses: { "200": { description: "Success" } } } }, "/auth/logout": { post: { summary: "User logout", operationId: "logoutUser", tags: ["authentication"], description: "Invalidate user session token", responses: { "200": { description: "Success" } } } }, "/users/profile": { get: { summary: "Get profile", operationId: "getProfile", tags: ["users"], description: "Get current user profile", responses: { "200": { description: "Success" } } } } } }); // Search by path component const result1 = documentationSearch.extractRelevantSections("", "auth login", 5, 2000, openApiSpec); expect(result1).toContain("[POST /auth/login]"); expect(result1).toContain("loginUser"); expect(result1).toContain("Authenticate user with credentials"); // Search by operationId const result2 = documentationSearch.extractRelevantSections("", "logoutUser", 5, 2000, openApiSpec); expect(result2).toContain("[POST /auth/logout]"); expect(result2).toContain("logoutUser"); // Search by tag const result3 = documentationSearch.extractRelevantSections("", "authentication", 5, 2000, openApiSpec); expect(result3).toContain("authentication"); expect(result3).toContain("login"); }); it('should limit number of returned operations based on maxSections', () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, paths: { "/users": { get: { summary: "List users", operationId: "listUsers", description: "Get all users in the system", responses: { "200": { description: "Success" } } }, post: { summary: "Create user", operationId: "createUser", description: "Create a new user account", responses: { "201": { description: "Created" } } } }, "/users/{id}": { get: { summary: "Get user", operationId: "getUser", description: "Retrieve user by ID", responses: { "200": { description: "Success" } } }, put: { summary: "Update user", operationId: "updateUser", description: "Update user information", responses: { "200": { description: "Success" } } }, delete: { summary: "Delete user", operationId: "deleteUser", description: "Remove user from system", responses: { "204": { description: "Deleted" } } } } } }); // With maxSections=2, should only get top 2 matching operations const result = documentationSearch.extractRelevantSections("", "users", 2, 2000, openApiSpec); expect(result).toContain("=== OPENAPI OPERATIONS ==="); // Count operation delimiters to verify we got limited results const operationCount = (result.match(/\[(?:GET|POST|PUT|DELETE) /g) || []).length; expect(operationCount).toBeLessThanOrEqual(2); }); it('should handle OpenAPI spec with parameters in operations', () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, paths: { "/search": { get: { summary: "Search items", operationId: "searchItems", description: "Search for items using query parameters", parameters: [ { name: "query", in: "query", required: true, schema: { type: "string" } }, { name: "limit", in: "query", schema: { type: "integer" } }, { name: "offset", in: "query", schema: { type: "integer" } } ], responses: { "200": { description: "Success" } } } } } }); // Search should match parameter names const result = documentationSearch.extractRelevantSections("", "query limit search", 5, 2000, openApiSpec); expect(result).toContain("[GET /search]"); expect(result).toContain("searchItems"); expect(result).toContain("query"); expect(result).toContain("limit"); }); it('should combine documentation sections with OpenAPI operations', () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, paths: { "/users": { get: { summary: "Get users", operationId: "getUsers", description: "List all users", responses: { "200": { description: "Success" } } } } } }); const doc = "This documentation explains how to use the users endpoint. " + "The users API allows you to manage user accounts. " + "You can list, create, update, and delete users."; const result = documentationSearch.extractRelevantSections(doc, "users", 5, 2000, openApiSpec); // Should contain both documentation sections and OpenAPI operations // Note: === DOCUMENTATION === header is only added when security info is present expect(result).toContain("users endpoint"); expect(result).toContain("manage user accounts"); expect(result).toContain("=== OPENAPI OPERATIONS ==="); expect(result).toContain("[GET /users]"); expect(result).toContain("getUsers"); }); it('should add DOCUMENTATION header when security info is also present', () => { const openApiSpec = JSON.stringify({ openapi: "3.0.0", info: { title: "Test API", version: "1.0.0" }, components: { securitySchemes: { bearerAuth: { type: "http", scheme: "bearer" } } }, security: [{ bearerAuth: [] }], paths: { "/users": { get: { summary: "Get users", operationId: "getUsers", description: "List all users", responses: { "200": { description: "Success" } } } } } }); const doc = "This documentation explains the API. Users endpoint allows managing accounts."; const result = documentationSearch.extractRelevantSections(doc, "users authentication", 5, 2000, openApiSpec); // With security keywords, should have all three sections with headers expect(result).toContain("=== SECURITY ==="); expect(result).toContain("bearerAuth"); expect(result).toContain("=== DOCUMENTATION ==="); expect(result).toContain("Users endpoint"); expect(result).toContain("=== OPENAPI OPERATIONS ==="); expect(result).toContain("[GET /users]"); }); it('should handle Google Discovery schema format', () => { const googleDiscoverySpec = JSON.stringify({ kind: "discovery#restDescription", name: "testapi", version: "v1", resources: { users: { methods: { list: { id: "testapi.users.list", path: "users", httpMethod: "GET", description: "Lists all users in the system", parameters: { maxResults: { type: "integer", location: "query" } } }, insert: { id: "testapi.users.insert", path: "users", httpMethod: "POST", description: "Creates a new user" } } } } }); const result = documentationSearch.extractRelevantSections("", "users list", 5, 2000, googleDiscoverySpec); expect(result).toContain("=== OPENAPI OPERATIONS ==="); expect(result).toContain("testapi.users.list"); expect(result).toContain("Lists all users"); expect(result).toContain("GET"); }); }); }); describe('Sitemap and URL Ranking', () => { let strategy: PlaywrightFetchingStrategy; beforeEach(() => { strategy = new PlaywrightFetchingStrategy(); vi.clearAllMocks(); mockedAxios = axios as Mocked<typeof axios>; mockedAxios.get.mockReset(); mockedAxios.post.mockReset(); ({ mockPage, mockContext, mockBrowser } = createPlaywrightMocks()); }); describe('rankItems', () => { it('should filter out URLs with excluded keywords', () => { const urls = [ 'https://api.com/docs/getting-started', 'https://api.com/pricing', 'https://api.com/docs/authentication', 'https://api.com/signup', 'https://api.com/blog/updates' ]; const keywords = ['docs', 'authentication']; const ranked = strategy.rankItems(urls, keywords); // Should exclude pricing, signup, and blog completely expect(ranked).toHaveLength(2); expect(ranked[0]).toBe('https://api.com/docs/authentication'); expect(ranked[1]).toBe('https://api.com/docs/getting-started'); expect(ranked).not.toContain('https://api.com/pricing'); expect(ranked).not.toContain('https://api.com/signup'); expect(ranked).not.toContain('https://api.com/blog/updates'); }); it('should rank URLs by keyword match count divided by URL length', () => { const urls = [ 'https://example.com/v1/users/read/fast', // No 'api' in domain, 1 match 'https://api.com/documentation/api/v1/users/endpoints', // Long, 2 matches 'https://api.com/api/users', // Short, 2 matches ]; const keywords = ['api', 'users']; const ranked = strategy.rankItems(urls, keywords) as string[]; // api/users should rank highest (2 matches, shortest URL with api) expect(ranked[0]).toBe('https://api.com/api/users'); // Long URL with 2 matches should be second expect(ranked[1]).toBe('https://api.com/documentation/api/v1/users/endpoints'); // URL with only 1 match should be last expect(ranked[2]).toBe('https://example.com/v1/users/read/fast'); }); it('should handle link objects with text', () => { const links = [ { linkText: 'API Reference', href: 'https://api.com/reference' }, { linkText: 'Getting Started', href: 'https://api.com/start' }, { linkText: 'Pricing Plans', href: 'https://api.com/pricing' } ]; const keywords = ['api', 'reference']; const ranked = strategy.rankItems(links, keywords); expect(ranked).toHaveLength(2); // Pricing excluded completely expect(ranked[0]).toEqual({ linkText: 'API Reference', href: 'https://api.com/reference' }); expect(ranked[1]).toEqual({ linkText: 'Getting Started', href: 'https://api.com/start' }); expect(ranked).not.toContainEqual({ linkText: 'Pricing Plans', href: 'https://api.com/pricing' }); }); it('should filter already fetched links when provided', () => { const urls = [ 'https://api.com/docs/intro', 'https://api.com/docs/api', 'https://api.com/docs/guide' ]; const fetchedLinks = new Set(['https://api.com/docs/intro']); const keywords = ['docs']; const ranked = strategy.rankItems(urls, keywords, fetchedLinks) as string[]; expect(ranked).toHaveLength(2); expect(ranked).not.toContain('https://api.com/docs/intro'); }); }); describe('Sitemap fetching', () => { it('should fetch and parse XML sitemap', async () => { const sitemapXml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url><loc>https://api.com/docs/intro</loc></url> <url><loc>https://api.com/docs/auth</loc></url> <url><loc>https://api.com/pricing</loc></url> <url><loc>https://api.com/docs/api</loc></url> </urlset>`; // Mock sitemap fetch mockedAxios.get.mockImplementation((url: string) => { if (url.includes('sitemap.xml')) { return Promise.resolve({ data: sitemapXml }); } return Promise.reject(new Error('404')); }); // Mock page fetches mockPage.evaluate.mockResolvedValue({ html: '<html><body>Content</body></html>', textContent: 'Content', links: {} }); const doc = new DocumentationFetcher({ documentationUrl: 'https://api.com/docs', keywords: ['api', 'auth'] }, {}, metadata); const result = await doc.fetchAndProcess(); // Should fetch sitemap expect(mockedAxios.get).toHaveBeenCalledWith( expect.stringContaining('sitemap.xml'), expect.any(Object) ); // Should have fetched pages (excluding pricing due to excluded keywords) expect(mockPage.goto).toHaveBeenCalled(); expect(result).toContain('Content'); }); it('should handle sitemap index with nested sitemaps', async () => { const sitemapIndex = `<?xml version="1.0" encoding="UTF-8"?> <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <sitemap><loc>https://api.com/docs/sitemap.xml</loc></sitemap> <sitemap><loc>https://api.com/blog/sitemap.xml</loc></sitemap> </sitemapindex>`; const docsSitemap = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url><loc>https://api.com/docs/intro</loc></url> <url><loc>https://api.com/docs/api</loc></url> </urlset>`; mockedAxios.get.mockImplementation((url: string) => { if (url.includes('sitemap_index.xml') || url === 'https://api.com/sitemap.xml') { return Promise.resolve({ data: sitemapIndex }); } if (url.includes('docs/sitemap.xml')) { return Promise.resolve({ data: docsSitemap }); } return Promise.reject(new Error('404')); }); mockPage.evaluate.mockResolvedValue({ html: '<html><body>Docs</body></html>', textContent: 'Docs', links: {} }); const doc = new DocumentationFetcher({ documentationUrl: 'https://api.com/docs', keywords: ['docs'] }, {}, metadata); const result = await doc.fetchAndProcess(); // Should fetch main sitemap and docs sitemap (not blog due to filtering) expect(mockedAxios.get).toHaveBeenCalledWith( expect.stringContaining('sitemap'), expect.any(Object) ); expect(result).toContain('Docs'); }); it('should fall back to legacy crawling if no sitemap found', async () => { // All sitemap requests fail mockedAxios.get.mockRejectedValue(new Error('404')); // Mock initial page with links mockPage.evaluate.mockResolvedValueOnce({ html: '<html><body>Main Page</body></html>', textContent: 'Main Page', links: { 'api reference https docs api': 'https://api.com/docs/api', 'getting started https docs start': 'https://api.com/docs/start' } }); const doc = new DocumentationFetcher({ documentationUrl: 'https://api.com/docs', keywords: ['api'] }, {}, metadata); const result = await doc.fetchAndProcess(); // Should use legacy crawling expect(mockPage.goto).toHaveBeenCalled(); expect(result).toContain('Main Page'); }); it('should respect MAX_FETCHED_LINKS limit', async () => { // Create a sitemap with many URLs const urls = Array.from({ length: 100 }, (_, i) => `<url><loc>https://api.com/docs/page${i}</loc></url>` ).join(''); const sitemapXml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> ${urls} </urlset>`; mockedAxios.get.mockImplementation((url: string) => { if (url.includes('sitemap.xml')) { return Promise.resolve({ data: sitemapXml }); } return Promise.reject(new Error('404')); }); mockPage.evaluate.mockResolvedValue({ html: '<html><body>Page</body></html>', textContent: 'Page', links: {} }); const doc = new DocumentationFetcher({ documentationUrl: 'https://api.com/docs', keywords: ['docs'] }, {}, metadata); await doc.fetchAndProcess(); // Should respect the limit (default is 10) expect(mockPage.goto).toHaveBeenCalledTimes(server_defaults.DOCUMENTATION.MAX_FETCHED_LINKS); }); it('should filter sitemap URLs by path relevance', async () => { const sitemapXml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url><loc>https://api.com/docs/api/intro</loc></url> <url><loc>https://api.com/company/about</loc></url> <url><loc>https://api.com/docs/api/auth</loc></url> <url><loc>https://api.com/marketing/landing</loc></url> </urlset>`; mockedAxios.get.mockImplementation((url: string) => { // Return sitemap for the first matching candidate if (url.includes('sitemap.xml')) { return Promise.resolve({ data: sitemapXml }); } return Promise.reject(new Error('404')); }); mockPage.evaluate.mockResolvedValue({ html: '<html><body>Content</body></html>', textContent: 'Content', links: {} }); const doc = new DocumentationFetcher({ documentationUrl: 'https://api.com/docs/api', keywords: ['intro', 'auth'] // Keywords that match the URLs }, {}, metadata); await doc.fetchAndProcess(); // Should have fetched some pages const calledUrls = mockPage.goto.mock.calls.map(call => call[0]); expect(calledUrls.length).toBeGreaterThan(0); // Verify that URL filtering worked by checking the fetched URLs // The implementation filters at collection time, so we should only see relevant URLs expect(calledUrls.some(url => url.includes('intro') || url.includes('auth'))).toBe(true); }); it('should deduplicate similar page content based on similarity threshold', async () => { const sitemapXml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url><loc>https://api.com/docs/page1</loc></url> <url><loc>https://api.com/docs/page2</loc></url> <url><loc>https://api.com/docs/page3</loc></url> <url><loc>https://api.com/docs/page4</loc></url> </urlset>`; mockedAxios.get.mockImplementation((url: string) => { if (url.includes('sitemap.xml')) { return Promise.resolve({ data: sitemapXml }); } return Promise.reject(new Error('404')); }); // Mock page content with duplicates const uniqueContent1 = 'Authentication API documentation with bearer token support and OAuth flows for secure access ' + 'x'.repeat(500); const duplicateContent = 'Authentication API documentation with bearer token support and OAuth flows for secure access ' + 'x'.repeat(500); const uniqueContent2 = 'Completely different content about webhooks and event subscriptions for real-time updates ' + 'y'.repeat(500); let callCount = 0; mockPage.evaluate.mockImplementation(() => { callCount++; if (callCount === 1) { return Promise.resolve({ html: `<html><body>${uniqueContent1}</body></html>`, textContent: uniqueContent1, links: {} }); } else if (callCount === 2) { return Promise.resolve({ html: `<html><body>${duplicateContent}</body></html>`, textContent: duplicateContent, links: {} }); } else if (callCount === 3) { return Promise.resolve({ html: `<html><body>${uniqueContent2}</body></html>`, textContent: uniqueContent2, links: {} }); } else { return Promise.resolve({ html: `<html><body>${duplicateContent}</body></html>`, textContent: duplicateContent, links: {} }); } }); const doc = new DocumentationFetcher({ documentationUrl: 'https://api.com/docs', keywords: ['api'] }, {}, metadata); const result = await doc.fetchAndProcess(); // Should have fetched multiple pages expect(mockPage.goto).toHaveBeenCalled(); expect(callCount).toBeGreaterThan(1); // Result should contain unique content expect(result).toContain('Authentication API documentation'); expect(result).toContain('webhooks and event subscriptions'); // Count occurrences of the duplicate content - should only appear once const occurrences = (result.match(/Authentication API documentation with bearer token support/g) || []).length; expect(occurrences).toBe(1); }); }); }); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/superglue-ai/superglue'

If you have feedback or need assistance with the MCP directory API, please join our Discord server