Spider MCP Server

Overview Schema Related Servers Score Discussions

parser.test.ts•4.17 KiB

import { describe, it, expect } from 'bun:test'; import { HtmlParser } from '@/spider/parser.js'; describe('html parser', () => { const baseUrl = 'https://example.com'; describe('content extraction', () => { it('should extract title from h1', () => { const html = '<html><body><h1>Test Title</h1>Content</body></html>'; const parser = new HtmlParser(baseUrl); const result = parser.parse(html); expect(result.title).toBe('Test Title'); }); it('should extract title from title tag', () => { const html = '<html><head><title>Page Title</title></head><body>Content</body></html>'; const parser = new HtmlParser(baseUrl); const result = parser.parse(html); expect(result.title).toBe('Page Title'); }); it('should extract content from body', () => { const html = '<html><body>This is content</body></html>'; const parser = new HtmlParser(baseUrl); const result = parser.parse(html); expect(result.content).toContain('This is content'); }); it('should default to "Untitled" when no title found', () => { const html = '<html><body>Content only</body></html>'; const parser = new HtmlParser(baseUrl); const result = parser.parse(html); expect(result.title).toBe('Untitled'); }); }); describe('metadata extraction', () => { it('should extract meta description', () => { const html = ` <html> <head> <meta name="description" content="Test description"> </head> <body>Content</body> </html> `; const parser = new HtmlParser(baseUrl); const result = parser.parse(html); expect(result.metadata.description).toBe('Test description'); }); it('should extract meta keywords', () => { const html = ` <html> <head> <meta name="keywords" content="test, keywords, html"> </head> <body>Content</body> </html> `; const parser = new HtmlParser(baseUrl); const result = parser.parse(html); expect(result.metadata.keywords).toEqual(['test', 'keywords', 'html']); }); it('should extract language', () => { const html = '<html lang="en"><body>Content</body></html>'; const parser = new HtmlParser(baseUrl); const result = parser.parse(html); expect(result.metadata.language).toBe('en'); }); it('should count words', () => { const html = '<html><body>one two three four five</body></html>'; const parser = new HtmlParser(baseUrl); const result = parser.parse(html); expect(result.metadata.wordCount).toBe(5); }); }); describe('link extraction', () => { it('should extract absolute links', () => { const html = ` <html> <body> <a href="https://other.com/page">External</a> <a href="/internal">Internal</a> Content </body> </html> `; const parser = new HtmlParser(baseUrl); const result = parser.parse(html); expect(result.links).toContain('https://other.com/page'); expect(result.links).toContain('https://example.com/internal'); }); it('should deduplicate links', () => { const html = ` <html> <body> <a href="/page">Link 1</a> <a href="/page">Link 2</a> Content </body> </html> `; const parser = new HtmlParser(baseUrl); const result = parser.parse(html); expect(result.links.length).toBe(1); expect(result.links[0]).toBe('https://example.com/page'); }); it('should ignore invalid links', () => { const html = ` <html> <body> <a href="">Empty</a> <a href="javascript:void(0)">JS</a> <a href="/valid">Valid</a> Content </body> </html> `; const parser = new HtmlParser(baseUrl); const result = parser.parse(html); expect(result.links).toEqual(['https://example.com/valid']); }); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/oeo/spider-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

parser.test.ts•4.17 KiB