MCP Web Docs

Overview Schema Related Servers Score Discussions

mcp-web-docs
src
processor

markdown.test.ts•8.66 KiB

import { processMarkdownContent, processExtractedContent } from './markdown.js'; import type { CrawlResult } from '../types.js'; describe('Markdown Processor', () => { describe('processMarkdownContent', () => { it('should process simple markdown content', async () => { const page: CrawlResult = { url: 'https://example.com/docs/guide.md', path: '/docs/guide.md', title: 'Guide', content: `# Introduction This is the introduction to our guide. ## Getting Started Here's how to get started with our library. ## Installation Run the following command: \`\`\`bash npm install our-library \`\`\` `, }; const result = await processMarkdownContent(page); expect(result).toBeDefined(); // Title comes from page title or front matter, not H1 necessarily expect(result?.article.title).toBeTruthy(); expect(result?.article.url).toBe(page.url); expect(result?.article.path).toBe(page.path); expect(result?.article.components.length).toBeGreaterThan(0); }); it('should extract front matter', async () => { const page: CrawlResult = { url: 'https://example.com/docs/page.md', path: '/docs/page.md', title: 'Page', content: `--- title: Custom Title description: A custom description --- # Heading Content here. `, }; const result = await processMarkdownContent(page); expect(result).toBeDefined(); expect(result?.article.title).toBe('Custom Title'); }); it('should preserve code blocks', async () => { const page: CrawlResult = { url: 'https://example.com/docs/code.md', path: '/docs/code.md', title: 'Code Examples', content: `# Code Examples Here is some JavaScript code: \`\`\`javascript function hello() { console.log('Hello, world!'); } \`\`\` And here is Python: \`\`\`python def hello(): print("Hello, world!") \`\`\` `, }; const result = await processMarkdownContent(page); expect(result).toBeDefined(); expect(result?.content).toContain('```javascript'); expect(result?.content).toContain('console.log'); expect(result?.content).toContain('```python'); }); it('should handle markdown without headers', async () => { const page: CrawlResult = { url: 'https://example.com/docs/simple.md', path: '/docs/simple.md', title: 'Simple Page', content: `This is just a simple paragraph. Another paragraph here. And a final one. `, }; const result = await processMarkdownContent(page); expect(result).toBeDefined(); expect(result?.article.components.length).toBeGreaterThan(0); }); it('should return undefined for empty content', async () => { const page: CrawlResult = { url: 'https://example.com/docs/empty.md', path: '/docs/empty.md', title: 'Empty', content: '', }; const result = await processMarkdownContent(page); expect(result).toBeUndefined(); }); it('should handle whitespace-only content', async () => { const page: CrawlResult = { url: 'https://example.com/docs/whitespace.md', path: '/docs/whitespace.md', title: 'Whitespace', content: ' \n\n \n ', }; const result = await processMarkdownContent(page); expect(result).toBeUndefined(); }); it('should handle nested headers', async () => { const page: CrawlResult = { url: 'https://example.com/docs/nested.md', path: '/docs/nested.md', title: 'Nested', content: `# Top Level Introduction text. ## Second Level Section content. ### Third Level Subsection content. #### Fourth Level Deep content. `, }; const result = await processMarkdownContent(page); expect(result).toBeDefined(); expect(result?.article.components.length).toBeGreaterThanOrEqual(4); }); it('should detect Docusaurus-style headers with unicode markers', async () => { const page: CrawlResult = { url: 'https://example.com/docs/docusaurus.md', path: '/docs/docusaurus.md', title: 'Docusaurus Page', content: `# Main Title Introduction paragraph. Hooks\u200B This section covers hooks. Example\u200B Here is an example. `, }; const result = await processMarkdownContent(page); expect(result).toBeDefined(); expect(result?.article.components.length).toBeGreaterThan(1); }); it('should handle special characters in content', async () => { const page: CrawlResult = { url: 'https://example.com/docs/special.md', path: '/docs/special.md', title: 'Special Characters', content: `# Special Characters Here are some special characters: < > & " ' \` * _ [] () {} ## Math Symbols α β γ δ ε × ÷ ± ≠ ≤ ≥ ## Emojis 🚀 📚 💡 ✅ ❌ `, }; const result = await processMarkdownContent(page); expect(result).toBeDefined(); expect(result?.content).toContain('<'); expect(result?.content).toContain('α'); expect(result?.content).toContain('🚀'); }); }); describe('processExtractedContent', () => { it('should process pre-extracted content', async () => { const page: CrawlResult = { url: 'https://example.com/component', path: '/component', title: 'Button Component', content: `# Button A versatile button component. ## Props | Prop | Type | Default | |------|------|---------| | variant | string | 'primary' | | disabled | boolean | false | ## Example \`\`\`jsx <Button variant="secondary">Click me</Button> \`\`\` `, extractorUsed: 'StorybookExtractor', }; const result = await processExtractedContent(page); expect(result).toBeDefined(); expect(result?.article.title).toBe('Button'); expect(result?.article.components.length).toBeGreaterThan(0); }); it('should handle content without sections', async () => { const page: CrawlResult = { url: 'https://example.com/simple', path: '/simple', title: 'Simple Page', content: `Just some plain text content without any headers or structure. This is another paragraph.`, extractorUsed: 'DefaultExtractor', }; const result = await processExtractedContent(page); expect(result).toBeDefined(); expect(result?.article.components.length).toBe(1); expect(result?.content).toContain('Just some plain text'); }); it('should return undefined for empty extracted content', async () => { const page: CrawlResult = { url: 'https://example.com/empty', path: '/empty', title: 'Empty', content: '', extractorUsed: 'StorybookExtractor', }; const result = await processExtractedContent(page); expect(result).toBeUndefined(); }); it('should preserve markdown formatting in extracted content', async () => { const page: CrawlResult = { url: 'https://example.com/formatted', path: '/formatted', title: 'Formatted', content: `# Formatted Content Here is **bold** and *italic* text. - List item 1 - List item 2 - List item 3 > A blockquote here And some \`inline code\` too. `, extractorUsed: 'GithubPagesExtractor', }; const result = await processExtractedContent(page); expect(result).toBeDefined(); expect(result?.content).toContain('**bold**'); expect(result?.content).toContain('*italic*'); expect(result?.content).toContain('- List item'); }); it('should use page title when no H1 found', async () => { const page: CrawlResult = { url: 'https://example.com/notitle', path: '/notitle', title: 'Page Title from Crawl', content: `## Only H2 Headers Some content here. ## Another Section More content. `, extractorUsed: 'StorybookExtractor', }; const result = await processExtractedContent(page); expect(result).toBeDefined(); expect(result?.article.title).toBe('Page Title from Crawl'); }); it('should handle very long content', async () => { const longContent = `# Long Document ${Array(100) .fill(null) .map((_, i) => `## Section ${i + 1}\n\nThis is the content for section ${i + 1}. Lorem ipsum dolor sit amet.`) .join('\n\n')} `; const page: CrawlResult = { url: 'https://example.com/long', path: '/long', title: 'Long Document', content: longContent, extractorUsed: 'DefaultExtractor', }; const result = await processExtractedContent(page); expect(result).toBeDefined(); expect(result?.article.components.length).toBeGreaterThan(50); }); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cosmocoder/mcp-web-docs'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

markdown.test.ts•8.66 KiB