Synergy/DE MCP Server

html-parser.test.ts•7.73 KiB

/** * Unit tests for HTML parser module */ import { describe, it, expect, vi, beforeEach } from "vitest"; import { parseHtml, extractBodyText } from "../html-parser.js"; import * as cheerio from "cheerio"; // Mock logger vi.mock("../../utils/logger.js", () => ({ logger: { debug: vi.fn(), logParsing: vi.fn(), }, })); describe("parseHtml", () => { describe("title extraction", () => { it("should extract title from title tag", () => { const html = "<html><head><title>Test Title</title></head><body>Content</body></html>"; const topic = parseHtml(html, { url: "http://example.com/topic", source: "online", }); expect(topic.title).toBe("Test Title"); }); it("should extract title from h1 if no title tag", () => { const html = "<html><body><h1>Main Heading</h1><p>Content</p></body></html>"; const topic = parseHtml(html, { url: "http://example.com/topic", source: "online", }); expect(topic.title).toBe("Main Heading"); }); it("should fallback to 'Untitled' if no title found", () => { const html = "<html><body><p>Content</p></body></html>"; const topic = parseHtml(html, { url: "http://example.com/topic", source: "online", }); expect(topic.title).toBe("Untitled"); }); }); describe("breadcrumb extraction", () => { it("should extract breadcrumbs from .breadcrumb", () => { const html = ` <html> <body> <nav class="breadcrumb"> <a href="/">Home</a> <span>Section</span> <span>Topic</span> </nav> <main>Content</main> </body> </html> `; const topic = parseHtml(html, { url: "http://example.com/topic", source: "online", }); expect(topic.path.length).toBeGreaterThan(0); expect(topic.path).toContain("Section"); }); it("should return empty array if no breadcrumbs", () => { const html = "<html><body><main>Content</main></body></html>"; const topic = parseHtml(html, { url: "http://example.com/topic", source: "online", }); expect(topic.path).toEqual([]); }); }); describe("navigation links", () => { it("should extract previous link", () => { const html = ` <html> <body> <main>Content</main> <a rel="prev" href="/prev-topic">Previous</a> </body> </html> `; const topic = parseHtml(html, { url: "http://example.com/topic", source: "online", }); const prevLink = topic.links.find((l) => l.type === "prev"); expect(prevLink).toBeDefined(); expect(prevLink?.target_topic_id).toBeDefined(); }); it("should extract next link", () => { const html = ` <html> <body> <main>Content</main> <a rel="next" href="/next-topic">Next</a> </body> </html> `; const topic = parseHtml(html, { url: "http://example.com/topic", source: "online", }); const nextLink = topic.links.find((l) => l.type === "next"); expect(nextLink).toBeDefined(); }); it("should extract parent link", () => { const html = ` <html> <body> <main>Content</main> <a rel="up" href="/parent-topic">Parent</a> </body> </html> `; const topic = parseHtml(html, { url: "http://example.com/topic", source: "online", }); const parentLink = topic.links.find((l) => l.type === "parent"); expect(parentLink).toBeDefined(); }); it("should extract related links", () => { const html = ` <html> <body> <main>Content</main> <div class="related"> <a href="/related1">Related 1</a> <a href="/related2">Related 2</a> </div> </body> </html> `; const topic = parseHtml(html, { url: "http://example.com/topic", source: "online", }); const relatedLinks = topic.links.filter((l) => l.type === "related"); expect(relatedLinks.length).toBeGreaterThan(0); }); }); describe("main content extraction", () => { it("should extract content from main tag", () => { const html = ` <html> <body> <header>Header</header> <main>Main content here</main> <footer>Footer</footer> </body> </html> `; const topic = parseHtml(html, { url: "http://example.com/topic", source: "online", }); expect(topic.summary).toContain("Main content"); }); it("should strip header and footer", () => { const html = ` <html> <body> <header>Header content</header> <main>Main content</main> <footer>Footer content</footer> </body> </html> `; const topic = parseHtml(html, { url: "http://example.com/topic", source: "online", }); expect(topic.summary).not.toContain("Header content"); expect(topic.summary).not.toContain("Footer content"); }); }); describe("topic structure", () => { it("should set correct topic ID", () => { const html = "<html><body><main>Content</main></body></html>"; const topic = parseHtml(html, { url: "http://example.com/topic-id", source: "online", }); expect(topic.id).toBeDefined(); }); it("should set version", () => { const html = "<html><body><main>Content</main></body></html>"; const topic = parseHtml(html, { url: "http://example.com/topic", version: "12.3", source: "online", }); expect(topic.version).toBe("12.3"); }); it("should set source", () => { const html = "<html><body><main>Content</main></body></html>"; const topic = parseHtml(html, { url: "http://example.com/topic", source: "local", }); expect(topic.source).toBe("local"); }); }); }); describe("extractBodyText", () => { it("should extract text from main content", () => { const html = ` <html> <body> <main> <h1>Heading</h1> <p>Paragraph text.</p> </main> </body> </html> `; const $ = cheerio.load(html); const bodyText = extractBodyText($); expect(bodyText).toContain("Heading"); expect(bodyText).toContain("Paragraph text"); }); it("should convert headings to markdown", () => { const html = ` <html> <body> <main> <h1>Level 1</h1> <h2>Level 2</h2> </main> </body> </html> `; const $ = cheerio.load(html); const bodyText = extractBodyText($); expect(bodyText).toMatch(/^# Level 1/); expect(bodyText).toMatch(/## Level 2/); }); it("should convert lists to markdown", () => { const html = ` <html> <body> <main> <ul> <li>Item 1</li> <li>Item 2</li> </ul> </main> </body> </html> `; const $ = cheerio.load(html); const bodyText = extractBodyText($); expect(bodyText).toContain("- Item 1"); expect(bodyText).toContain("- Item 2"); }); it("should preserve code blocks", () => { const html = ` <html> <body> <main> <pre><code>code content</code></pre> </main> </body> </html> `; const $ = cheerio.load(html); const bodyText = extractBodyText($); expect(bodyText).toContain("```"); expect(bodyText).toContain("code content"); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/h0ck3ystyx/synergyde-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

html-parser.test.ts•7.73 KiB