Skip to main content
Glama

documcp

by tosin2013
sitemap-generator.test.ts19.2 kB
/** * Tests for sitemap-generator utility */ import { promises as fs } from "fs"; import path from "path"; import { tmpdir } from "os"; import { generateSitemap, parseSitemap, validateSitemap, updateSitemap, listSitemapUrls, type SitemapUrl, type SitemapOptions, } from "../../src/utils/sitemap-generator.js"; describe("sitemap-generator", () => { let testDir: string; let docsDir: string; beforeEach(async () => { // Create temporary test directory testDir = path.join(tmpdir(), `sitemap-test-${Date.now()}`); docsDir = path.join(testDir, "docs"); await fs.mkdir(docsDir, { recursive: true }); }); afterEach(async () => { // Clean up test directory try { await fs.rm(testDir, { recursive: true, force: true }); } catch (error) { // Ignore cleanup errors } }); describe("generateSitemap", () => { it("should generate sitemap.xml from documentation files", async () => { // Create test documentation structure await fs.mkdir(path.join(docsDir, "tutorials"), { recursive: true }); await fs.mkdir(path.join(docsDir, "reference"), { recursive: true }); await fs.writeFile( path.join(docsDir, "index.md"), "# Home\n\nWelcome to the docs!", ); await fs.writeFile( path.join(docsDir, "tutorials", "getting-started.md"), "# Getting Started\n\nStart here.", ); await fs.writeFile( path.join(docsDir, "reference", "api.md"), "# API Reference\n\nAPI documentation.", ); const options: SitemapOptions = { baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }; const result = await generateSitemap(options); expect(result.xml).toContain('<?xml version="1.0" encoding="UTF-8"?>'); expect(result.xml).toContain( '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">', ); expect(result.urls).toHaveLength(3); expect(result.stats.totalUrls).toBe(3); expect(result.stats.byCategory).toHaveProperty("home"); expect(result.stats.byCategory).toHaveProperty("tutorial"); expect(result.stats.byCategory).toHaveProperty("reference"); }); it("should generate URLs with correct priorities based on categories", async () => { await fs.mkdir(path.join(docsDir, "tutorials"), { recursive: true }); await fs.mkdir(path.join(docsDir, "reference"), { recursive: true }); await fs.writeFile( path.join(docsDir, "tutorials", "guide.md"), "# Tutorial", ); await fs.writeFile( path.join(docsDir, "reference", "api.md"), "# Reference", ); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); const tutorialUrl = result.urls.find((u) => u.category === "tutorial"); const referenceUrl = result.urls.find((u) => u.category === "reference"); expect(tutorialUrl?.priority).toBe(1.0); // Highest priority expect(referenceUrl?.priority).toBe(0.8); }); it("should handle empty documentation directory", async () => { const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); expect(result.urls).toHaveLength(0); expect(result.stats.totalUrls).toBe(0); }); it("should exclude node_modules and other excluded patterns", async () => { await fs.mkdir(path.join(docsDir, "node_modules"), { recursive: true }); await fs.writeFile( path.join(docsDir, "node_modules", "package.md"), "# Package", ); await fs.writeFile(path.join(docsDir, "guide.md"), "# Guide"); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); expect(result.urls).toHaveLength(1); expect(result.urls[0].loc).toContain("guide.html"); }); it("should convert markdown extensions to html", async () => { await fs.writeFile(path.join(docsDir, "page.md"), "# Page"); await fs.writeFile(path.join(docsDir, "component.mdx"), "# Component"); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); expect(result.urls[0].loc).toContain(".html"); expect(result.urls[1].loc).toContain(".html"); expect(result.urls.some((u) => u.loc.endsWith(".md"))).toBe(false); expect(result.urls.some((u) => u.loc.endsWith(".mdx"))).toBe(false); }); it("should extract title from markdown frontmatter", async () => { const content = `--- title: My Custom Title --- # Main Heading Content here.`; await fs.writeFile(path.join(docsDir, "page.md"), content); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); expect(result.urls[0].title).toBe("My Custom Title"); }); it("should extract title from markdown heading", async () => { await fs.writeFile( path.join(docsDir, "page.md"), "# Page Title\n\nContent", ); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); expect(result.urls[0].title).toBe("Page Title"); }); it("should handle custom include and exclude patterns", async () => { await fs.writeFile(path.join(docsDir, "page.md"), "# Markdown"); await fs.writeFile(path.join(docsDir, "page.html"), "<h1>HTML</h1>"); await fs.writeFile(path.join(docsDir, "page.txt"), "Text"); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, includePatterns: ["**/*.md"], excludePatterns: [], useGitHistory: false, }); expect(result.urls).toHaveLength(1); expect(result.urls[0].loc).toContain("page.html"); }); }); describe("parseSitemap", () => { it("should parse existing sitemap.xml", async () => { const xml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url> <loc>https://example.com/page1.html</loc> <lastmod>2025-01-01</lastmod> <changefreq>monthly</changefreq> <priority>0.8</priority> </url> <url> <loc>https://example.com/page2.html</loc> <lastmod>2025-01-02</lastmod> <changefreq>weekly</changefreq> <priority>1.0</priority> </url> </urlset>`; const sitemapPath = path.join(testDir, "sitemap.xml"); await fs.writeFile(sitemapPath, xml); const urls = await parseSitemap(sitemapPath); expect(urls).toHaveLength(2); expect(urls[0].loc).toBe("https://example.com/page1.html"); expect(urls[0].lastmod).toBe("2025-01-01"); expect(urls[0].changefreq).toBe("monthly"); expect(urls[0].priority).toBe(0.8); expect(urls[1].loc).toBe("https://example.com/page2.html"); }); it("should handle XML special characters", async () => { const xml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url> <loc>https://example.com/page?id=1&amp;type=test</loc> </url> </urlset>`; const sitemapPath = path.join(testDir, "sitemap.xml"); await fs.writeFile(sitemapPath, xml); const urls = await parseSitemap(sitemapPath); expect(urls[0].loc).toBe("https://example.com/page?id=1&type=test"); }); it("should throw error for invalid sitemap", async () => { const sitemapPath = path.join(testDir, "invalid.xml"); await fs.writeFile(sitemapPath, "not xml"); const urls = await parseSitemap(sitemapPath); expect(urls).toHaveLength(0); // Graceful handling of invalid XML }); }); describe("validateSitemap", () => { it("should validate correct sitemap", async () => { const xml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url> <loc>https://example.com/page.html</loc> <lastmod>2025-01-01</lastmod> <changefreq>monthly</changefreq> <priority>0.8</priority> </url> </urlset>`; const sitemapPath = path.join(testDir, "sitemap.xml"); await fs.writeFile(sitemapPath, xml); const result = await validateSitemap(sitemapPath); expect(result.valid).toBe(true); expect(result.errors).toHaveLength(0); expect(result.urlCount).toBe(1); }); it("should detect missing loc element", async () => { const xml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url> <lastmod>2025-01-01</lastmod> </url> </urlset>`; const sitemapPath = path.join(testDir, "sitemap.xml"); await fs.writeFile(sitemapPath, xml); const result = await validateSitemap(sitemapPath); expect(result.valid).toBe(false); expect(result.errors.some((e) => e.includes("Missing <loc>"))).toBe(true); }); it("should detect invalid priority", async () => { const xml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url> <loc>https://example.com/page.html</loc> <priority>1.5</priority> </url> </urlset>`; const sitemapPath = path.join(testDir, "sitemap.xml"); await fs.writeFile(sitemapPath, xml); const result = await validateSitemap(sitemapPath); expect(result.valid).toBe(false); expect( result.errors.some((e) => e.includes("Priority must be between 0.0 and 1.0"), ), ).toBe(true); }); it("should detect invalid protocol", async () => { const xml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url> <loc>ftp://example.com/page.html</loc> </url> </urlset>`; const sitemapPath = path.join(testDir, "sitemap.xml"); await fs.writeFile(sitemapPath, xml); const result = await validateSitemap(sitemapPath); expect(result.valid).toBe(false); expect(result.errors.some((e) => e.includes("Invalid protocol"))).toBe( true, ); }); it("should return error if sitemap does not exist", async () => { const sitemapPath = path.join(testDir, "nonexistent.xml"); const result = await validateSitemap(sitemapPath); expect(result.valid).toBe(false); expect(result.errors.some((e) => e.includes("does not exist"))).toBe( true, ); }); }); describe("updateSitemap", () => { it("should update existing sitemap with new pages", async () => { // Create initial sitemap const xml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url> <loc>https://example.com/page1.html</loc> <lastmod>2025-01-01</lastmod> <priority>0.8</priority> </url> </urlset>`; const sitemapPath = path.join(testDir, "sitemap.xml"); await fs.writeFile(sitemapPath, xml); // Add new documentation file await fs.writeFile(path.join(docsDir, "page1.md"), "# Page 1"); await fs.writeFile(path.join(docsDir, "page2.md"), "# Page 2"); const changes = await updateSitemap(sitemapPath, { baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); expect(changes.added).toBe(1); // page2.md is new expect(changes.total).toBe(2); }); it("should detect removed pages", async () => { // Create initial sitemap with 2 URLs const xml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url> <loc>https://example.com/page1.html</loc> </url> <url> <loc>https://example.com/page2.html</loc> </url> </urlset>`; const sitemapPath = path.join(testDir, "sitemap.xml"); await fs.writeFile(sitemapPath, xml); // Only create page1.md await fs.writeFile(path.join(docsDir, "page1.md"), "# Page 1"); const changes = await updateSitemap(sitemapPath, { baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); expect(changes.removed).toBe(1); // page2.html was removed expect(changes.total).toBe(1); }); it("should create new sitemap if none exists", async () => { const sitemapPath = path.join(testDir, "sitemap.xml"); await fs.writeFile(path.join(docsDir, "page.md"), "# Page"); const changes = await updateSitemap(sitemapPath, { baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); expect(changes.added).toBe(1); expect(changes.total).toBe(1); // Verify sitemap was created const exists = await fs .access(sitemapPath) .then(() => true) .catch(() => false); expect(exists).toBe(true); }); }); describe("listSitemapUrls", () => { it("should list all URLs from sitemap", async () => { const xml = `<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url> <loc>https://example.com/page1.html</loc> <priority>0.9</priority> </url> <url> <loc>https://example.com/page2.html</loc> <priority>0.8</priority> </url> </urlset>`; const sitemapPath = path.join(testDir, "sitemap.xml"); await fs.writeFile(sitemapPath, xml); const urls = await listSitemapUrls(sitemapPath); expect(urls).toHaveLength(2); expect(urls[0].loc).toBe("https://example.com/page1.html"); expect(urls[1].loc).toBe("https://example.com/page2.html"); }); }); describe("edge cases", () => { it("should handle deeply nested directory structures", async () => { const deepPath = path.join(docsDir, "a", "b", "c", "d"); await fs.mkdir(deepPath, { recursive: true }); await fs.writeFile(path.join(deepPath, "deep.md"), "# Deep Page"); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); expect(result.urls).toHaveLength(1); expect(result.urls[0].loc).toContain("a/b/c/d/deep.html"); }); it("should handle files with special characters in names", async () => { await fs.writeFile(path.join(docsDir, "my-page-2024.md"), "# Page"); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); expect(result.urls).toHaveLength(1); expect(result.urls[0].loc).toContain("my-page-2024.html"); }); it("should handle index.html correctly", async () => { await fs.writeFile(path.join(docsDir, "index.md"), "# Home"); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); expect(result.urls[0].loc).toBe("https://example.com/"); }); it("should exclude directories matching exclusion patterns", async () => { // Create directory structure with excluded dirs await fs.mkdir(path.join(docsDir, "node_modules"), { recursive: true }); await fs.mkdir(path.join(docsDir, "valid"), { recursive: true }); await fs.writeFile( path.join(docsDir, "node_modules", "package.md"), "# Should be excluded", ); await fs.writeFile( path.join(docsDir, "valid", "page.md"), "# Valid Page", ); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); // Should only include valid directory, not node_modules expect(result.urls).toHaveLength(1); expect(result.urls[0].loc).toContain("valid/page"); }); it("should handle directory scan errors gracefully", async () => { // Create a valid docs directory await fs.writeFile(path.join(docsDir, "valid.md"), "# Valid"); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); // Should succeed despite potential permission issues expect(result.urls.length).toBeGreaterThanOrEqual(1); }); it("should categorize explanation pages correctly", async () => { await fs.mkdir(path.join(docsDir, "explanation"), { recursive: true }); await fs.writeFile( path.join(docsDir, "explanation", "concepts.md"), "# Concepts", ); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); expect(result.stats.byCategory).toHaveProperty("explanation"); expect(result.stats.byCategory.explanation).toBeGreaterThan(0); }); it("should fall back to file system date when git fails", async () => { await fs.writeFile(path.join(docsDir, "no-git.md"), "# No Git"); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: true, // Try git but will fall back }); // Should still have a lastmod date from file system expect(result.urls[0].lastmod).toBeDefined(); expect(result.urls[0].lastmod).toMatch(/^\d{4}-\d{2}-\d{2}$/); }); it("should handle files without extensions", async () => { await fs.writeFile(path.join(docsDir, "README"), "# Readme"); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, includePatterns: ["**/*"], // Include all files useGitHistory: false, }); // Should handle extensionless files expect(result.urls.length).toBeGreaterThanOrEqual(0); }); it("should handle empty git timestamp", async () => { // Create file and generate sitemap with git enabled await fs.writeFile(path.join(docsDir, "test.md"), "# Test"); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: true, }); // Should have valid dates even if git returns empty expect(result.urls[0].lastmod).toBeDefined(); }); it("should handle files in deeply excluded paths", async () => { await fs.mkdir(path.join(docsDir, ".git", "objects"), { recursive: true, }); await fs.writeFile( path.join(docsDir, ".git", "objects", "file.md"), "# Git Object", ); await fs.writeFile(path.join(docsDir, "valid.md"), "# Valid"); const result = await generateSitemap({ baseUrl: "https://example.com", docsPath: docsDir, useGitHistory: false, }); // Should exclude .git directory expect(result.urls).toHaveLength(1); expect(result.urls[0].loc).not.toContain(".git"); }); }); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tosin2013/documcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server