MCP Server for Crawl4AI

MIT License

Overview InspectNew Endpoints Schema Related Servers Reviews Score

parse-sitemap.integration.test.ts•6.71 kB

/* eslint-env jest */ import { Client } from '@modelcontextprotocol/sdk/client/index.js'; import { createTestClient, cleanupTestClient, TEST_TIMEOUTS } from './test-utils.js'; interface ToolResult { content: Array<{ type: string; text?: string; }>; } describe('parse_sitemap Integration Tests', () => { let client: Client; beforeAll(async () => { client = await createTestClient(); }, TEST_TIMEOUTS.medium); afterAll(async () => { if (client) { await cleanupTestClient(client); } }); describe('Basic functionality', () => { it( 'should parse nodejs.org sitemap successfully', async () => { const result = await client.callTool({ name: 'parse_sitemap', arguments: { url: 'https://nodejs.org/sitemap.xml', }, }); expect(result).toBeDefined(); const content = (result as ToolResult).content; expect(content).toBeDefined(); expect(Array.isArray(content)).toBe(true); expect(content.length).toBeGreaterThan(0); const textContent = content.find((c) => c.type === 'text'); expect(textContent).toBeDefined(); expect(textContent?.text).toContain('Sitemap parsed successfully'); expect(textContent?.text).toContain('Total URLs found:'); expect(textContent?.text).toContain('https://nodejs.org'); // Should find many URLs in the nodejs sitemap expect(textContent?.text).toMatch(/Total URLs found: [1-9][0-9]+/); }, TEST_TIMEOUTS.medium, ); it( 'should filter URLs with regex pattern', async () => { const result = await client.callTool({ name: 'parse_sitemap', arguments: { url: 'https://nodejs.org/sitemap.xml', filter_pattern: '.*/learn/.*', // Only URLs containing /learn/ }, }); expect(result).toBeDefined(); const content = (result as ToolResult).content; const textContent = content.find((c) => c.type === 'text'); expect(textContent).toBeDefined(); // Check that filtering worked expect(textContent?.text).toContain('Filtered URLs:'); // All URLs in the result should contain /learn/ const urlsSection = textContent?.text?.split('URLs:\n')[1]; if (urlsSection) { const urls = urlsSection.split('\n').filter((url) => url.trim()); urls.forEach((url) => { if (url && !url.includes('... and')) { expect(url).toContain('/learn/'); } }); } }, TEST_TIMEOUTS.medium, ); it( 'should handle empty sitemaps', async () => { // Using a URL that returns valid XML but not a sitemap const result = await client.callTool({ name: 'parse_sitemap', arguments: { url: 'https://www.w3schools.com/xml/note.xml', }, }); expect(result).toBeDefined(); const content = (result as ToolResult).content; const textContent = content.find((c) => c.type === 'text'); expect(textContent).toBeDefined(); expect(textContent?.text).toContain('Total URLs found: 0'); }, TEST_TIMEOUTS.medium, ); it( 'should handle large sitemaps with truncation', async () => { const result = await client.callTool({ name: 'parse_sitemap', arguments: { url: 'https://nodejs.org/sitemap.xml', filter_pattern: '.*', // Match all to test truncation }, }); expect(result).toBeDefined(); const content = (result as ToolResult).content; const textContent = content.find((c) => c.type === 'text'); expect(textContent).toBeDefined(); // Should show max 100 URLs and indicate there are more if (textContent?.text && textContent.text.includes('... and')) { expect(textContent.text).toMatch(/\.\.\. and \d+ more/); } }, TEST_TIMEOUTS.medium, ); }); describe('Error handling', () => { it( 'should handle invalid URLs', async () => { const result = await client.callTool({ name: 'parse_sitemap', arguments: { url: 'not-a-url', }, }); expect(result).toBeDefined(); const content = (result as ToolResult).content; expect(content).toBeDefined(); const textContent = content.find((c) => c.type === 'text'); expect(textContent).toBeDefined(); expect(textContent?.text).toContain('Error'); expect(textContent?.text?.toLowerCase()).toContain('invalid'); }, TEST_TIMEOUTS.short, ); it( 'should handle non-existent URLs', async () => { const result = await client.callTool({ name: 'parse_sitemap', arguments: { url: 'https://this-domain-definitely-does-not-exist-12345.com/sitemap.xml', }, }); expect(result).toBeDefined(); const content = (result as ToolResult).content; const textContent = content.find((c) => c.type === 'text'); expect(textContent).toBeDefined(); expect(textContent?.text).toContain('Error'); }, TEST_TIMEOUTS.medium, ); it( 'should handle non-XML content', async () => { const result = await client.callTool({ name: 'parse_sitemap', arguments: { url: 'https://example.com', // HTML page, not XML }, }); expect(result).toBeDefined(); const content = (result as ToolResult).content; const textContent = content.find((c) => c.type === 'text'); expect(textContent).toBeDefined(); // Should still parse but likely find 0 URLs since it's not a sitemap expect(textContent?.text).toContain('Total URLs found:'); }, TEST_TIMEOUTS.medium, ); it( 'should handle invalid regex patterns', async () => { const result = await client.callTool({ name: 'parse_sitemap', arguments: { url: 'https://nodejs.org/sitemap.xml', filter_pattern: '[invalid(regex', // Invalid regex }, }); expect(result).toBeDefined(); const content = (result as ToolResult).content; const textContent = content.find((c) => c.type === 'text'); expect(textContent).toBeDefined(); expect(textContent?.text).toContain('Error'); expect(textContent?.text?.toLowerCase()).toMatch(/failed|error|invalid/); }, TEST_TIMEOUTS.medium, ); }); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/omgwtfwow/mcp-crawl4ai-ts'

If you have feedback or need assistance with the MCP directory API, please join our Discord server