MCP Web Docs

Overview Schema Related Servers Score Discussions

default-extractor.test.ts•6.81 KiB

import { DefaultExtractor } from './default-extractor.js'; import { JSDOM } from 'jsdom'; describe('DefaultExtractor', () => { let extractor: DefaultExtractor; beforeEach(() => { extractor = new DefaultExtractor(); }); function createDocument(html: string): Document { const dom = new JSDOM(html); return dom.window.document; } describe('extractContent', () => { it('should extract content from main element', async () => { const html = ` <html> <body> <nav>Navigation</nav> <main> <h1>Main Title</h1> <p>Main content here</p> </main> <footer>Footer</footer> </body> </html> `; const doc = createDocument(html); const result = await extractor.extractContent(doc); expect(result.content).toContain('Main Title'); expect(result.content).toContain('Main content here'); expect(result.content).not.toContain('Navigation'); expect(result.content).not.toContain('Footer'); }); it('should extract content from article element', async () => { const html = ` <html> <body> <header>Header</header> <article> <h1>Article Title</h1> <p>Article content</p> </article> </body> </html> `; const doc = createDocument(html); const result = await extractor.extractContent(doc); expect(result.content).toContain('Article Title'); expect(result.content).toContain('Article content'); expect(result.content).not.toContain('Header'); }); it('should extract content from role="main" element', async () => { const html = ` <html> <body> <nav>Nav</nav> <div role="main"> <h1>Role Main Title</h1> <p>Role main content</p> </div> </body> </html> `; const doc = createDocument(html); const result = await extractor.extractContent(doc); expect(result.content).toContain('Role Main Title'); expect(result.content).toContain('Role main content'); expect(result.content).not.toContain('Nav'); }); it('should fall back to body when no main content element', async () => { const html = ` <html> <body> <div> <h1>Page Title</h1> <p>Page content</p> </div> </body> </html> `; const doc = createDocument(html); const result = await extractor.extractContent(doc); expect(result.content).toContain('Page Title'); expect(result.content).toContain('Page content'); }); it('should remove script and style elements', async () => { const html = ` <html> <body> <style>.hidden { display: none; }</style> <script>console.log('secret');</script> <main> <h1>Visible Content</h1> </main> </body> </html> `; const doc = createDocument(html); const result = await extractor.extractContent(doc); expect(result.content).toContain('Visible Content'); expect(result.content).not.toContain('hidden'); expect(result.content).not.toContain('secret'); }); it('should extract title from h1', async () => { const html = ` <html> <body> <main> <h1>Component Name</h1> <p>Description paragraph</p> </main> </body> </html> `; const doc = createDocument(html); const result = await extractor.extractContent(doc); expect(result.metadata.pattern?.name).toBe('Component Name'); }); it('should extract description from first paragraph after h1', async () => { const html = ` <html> <body> <main> <h1>Component Name</h1> <p>This is the component description.</p> <p>This is additional content.</p> </main> </body> </html> `; const doc = createDocument(html); const result = await extractor.extractContent(doc); expect(result.metadata.pattern?.description).toBe('This is the component description.'); }); it('should return overview type metadata', async () => { const html = '<html><body><main><h1>Test</h1></main></body></html>'; const doc = createDocument(html); const result = await extractor.extractContent(doc); expect(result.metadata.type).toBe('overview'); }); it('should return component type pattern', async () => { const html = '<html><body><main><h1>Test</h1></main></body></html>'; const doc = createDocument(html); const result = await extractor.extractContent(doc); expect(result.metadata.pattern?.type).toBe('component'); }); it('should handle empty document', async () => { const html = '<html><body></body></html>'; const doc = createDocument(html); const result = await extractor.extractContent(doc); expect(result.content).toBe(''); expect(result.metadata.type).toBe('overview'); }); it('should handle document with only whitespace', async () => { const html = '<html><body> \n\n </body></html>'; const doc = createDocument(html); const result = await extractor.extractContent(doc); expect(result.content).toBe(''); }); it('should initialize usageContexts and relatedPatterns as empty arrays', async () => { const html = '<html><body><main><h1>Test</h1></main></body></html>'; const doc = createDocument(html); const result = await extractor.extractContent(doc); expect(result.metadata.pattern?.usageContexts).toEqual([]); expect(result.metadata.pattern?.relatedPatterns).toEqual([]); }); it('should handle missing h1', async () => { const html = ` <html> <body> <main> <h2>Subheading</h2> <p>Content here</p> </main> </body> </html> `; const doc = createDocument(html); const result = await extractor.extractContent(doc); expect(result.metadata.pattern?.name).toBe(''); expect(result.content).toContain('Content here'); }); it('should handle missing description paragraph', async () => { const html = ` <html> <body> <main> <h1>Title Only</h1> <div>Some div content</div> </main> </body> </html> `; const doc = createDocument(html); const result = await extractor.extractContent(doc); expect(result.metadata.pattern?.name).toBe('Title Only'); expect(result.metadata.pattern?.description).toBe(''); }); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cosmocoder/mcp-web-docs'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

default-extractor.test.ts•6.81 KiB