Markdown MCP Server

Overview Schema Related Servers Score Discussions

markdown-mcp

markdown-mcp.js•14.5 KiB

#!/usr/bin/env node /** * Markdown MCP Server * Extracts clean markdown content from web pages using Playwright */ const { StdioServerTransport } = require('@modelcontextprotocol/sdk/server/stdio.js'); const { Server } = require('@modelcontextprotocol/sdk/server/index.js'); const { CallToolRequestSchema, ListToolsRequestSchema } = require('@modelcontextprotocol/sdk/types.js'); const { chromium } = require('playwright'); class MarkdownMCPServer { constructor() { this.server = new Server( { name: 'markdown-mcp', version: '1.0.0', }, { capabilities: { tools: {}, }, } ); this.browser = null; this.setupHandlers(); } setupHandlers() { this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: 'get_page_markdown', description: 'Extract clean markdown content from a URL. Returns only the main content without navigation, headers, footers, or sidebars.', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'The URL to extract markdown from', }, includeImages: { type: 'boolean', description: 'Whether to include image references in markdown (default: true)', default: true, }, includeLinks: { type: 'boolean', description: 'Whether to include hyperlinks in markdown (default: true)', default: true, }, waitForSelector: { type: 'string', description: 'Optional CSS selector to wait for before extracting content', }, timeout: { type: 'number', description: 'Navigation timeout in milliseconds (default: 30000)', default: 30000, }, }, required: ['url'], }, }, ], })); this.server.setRequestHandler(CallToolRequestSchema, async (request) => { if (request.params.name === 'get_page_markdown') { return await this.getPageMarkdown(request.params.arguments); } throw new Error(`Unknown tool: ${request.params.name}`); }); } async ensureBrowser() { if (!this.browser) { this.browser = await chromium.launch({ headless: true, }); } return this.browser; } async getPageMarkdown(args) { const { url, includeImages = true, includeLinks = true, waitForSelector, timeout = 30000, } = args; const browser = await this.ensureBrowser(); const context = await browser.newContext(); const page = await context.newPage(); try { await page.goto(url, { waitUntil: 'domcontentloaded', timeout }); if (waitForSelector) { await page.waitForSelector(waitForSelector, { timeout: 10000 }); } else { // Wait for content to load - especially important for JS-heavy sites await page.waitForTimeout(5000); } const markdown = await page.evaluate( ({ includeImages, includeLinks }) => { function extractMainContent() { // Confluence-specific selectors first, then general ones const mainSelectors = [ '#main-content', '.wiki-content', '[data-test-id="wiki-content"]', 'main[role="main"]', 'main', 'article', '[role="main"]', '.main-content', '.content', '#content', '.post-content', '.article-content', 'body', ]; for (const selector of mainSelectors) { const element = document.querySelector(selector); if (element && element.textContent.trim().length > 100) { return element; } } return document.body; } function shouldSkipElement(element) { if (!element || !element.tagName) return true; const tagName = element.tagName.toLowerCase(); // Never skip these content elements if (['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol', 'li', 'table', 'pre', 'code', 'blockquote'].includes(tagName)) { return false; } // Check for hidden elements if (element.offsetParent === null && tagName !== 'script' && tagName !== 'style') { const style = window.getComputedStyle(element); if (style.display === 'none' || style.visibility === 'hidden') { return true; } } // Skip technical elements if (['script', 'style', 'noscript', 'iframe'].includes(tagName)) { return true; } // Check role attributes const role = element.getAttribute('role'); if (['navigation', 'banner', 'contentinfo', 'complementary'].includes(role)) { return true; } // Check specific element types if (tagName === 'nav' || tagName === 'header' || tagName === 'footer' || tagName === 'aside') { return true; } // Check class and id for common patterns (but be less aggressive) const className = (element.className || '').toString().toLowerCase(); const id = (element.id || '').toLowerCase(); const combined = className + ' ' + id; const strictSkipPatterns = [ 'cookie-banner', 'gdpr', 'advertisement', 'sponsored', ]; return strictSkipPatterns.some(pattern => combined.includes(pattern)); } function getTextContent(node) { let text = ''; for (const child of node.childNodes) { if (child.nodeType === Node.TEXT_NODE) { text += child.textContent; } else if (child.nodeType === Node.ELEMENT_NODE) { const tag = child.tagName.toLowerCase(); if (tag === 'br') { text += '\n'; } else if (!shouldSkipElement(child)) { text += getTextContent(child); } } } return text; } function convertToMarkdown(node, depth = 0, inList = false) { if (!node || shouldSkipElement(node)) return ''; let markdown = ''; const tagName = node.tagName?.toLowerCase(); // Headings if (tagName?.match(/^h[1-6]$/)) { const level = parseInt(tagName[1]); const text = getTextContent(node).trim(); if (text) { markdown += '\n' + '#'.repeat(level) + ' ' + text + '\n\n'; } return markdown; } // Paragraphs if (tagName === 'p') { let content = ''; for (const child of node.childNodes) { if (child.nodeType === Node.TEXT_NODE) { content += child.textContent; } else if (child.nodeType === Node.ELEMENT_NODE) { content += convertToMarkdown(child, depth + 1); } } const text = content.trim(); if (text) { markdown += text + '\n\n'; } return markdown; } // Code blocks if (tagName === 'pre') { const code = node.querySelector('code'); const text = (code || node).textContent.trim(); if (text) { const language = code?.className.match(/language-(\w+)/)?.[1] || ''; markdown += '\n```' + language + '\n' + text + '\n```\n\n'; } return markdown; } // Inline code if (tagName === 'code' && node.parentElement?.tagName !== 'PRE') { return '`' + node.textContent.trim() + '`'; } // Blockquotes if (tagName === 'blockquote') { const text = getTextContent(node).trim(); if (text) { const lines = text.split('\n').filter(l => l.trim()); markdown += '\n' + lines.map(line => '> ' + line.trim()).join('\n') + '\n\n'; } return markdown; } // Lists if (tagName === 'ul' || tagName === 'ol') { const items = Array.from(node.children).filter(child => child.tagName === 'LI'); items.forEach((li, idx) => { const prefix = tagName === 'ol' ? `${idx + 1}. ` : '- '; let itemContent = ''; for (const child of li.childNodes) { if (child.nodeType === Node.TEXT_NODE) { itemContent += child.textContent; } else if (child.nodeType === Node.ELEMENT_NODE) { itemContent += convertToMarkdown(child, depth + 1, true); } } const text = itemContent.trim(); if (text) { markdown += prefix + text + '\n'; } }); if (!inList) markdown += '\n'; return markdown; } // Images if (tagName === 'img' && includeImages) { const alt = node.getAttribute('alt') || ''; const src = node.getAttribute('src') || node.getAttribute('data-src') || ''; if (src) { try { const fullSrc = new URL(src, window.location.href).href; markdown += `![${alt}](${fullSrc})\n\n`; } catch (e) { // Invalid URL, skip } } return markdown; } // Links if (tagName === 'a' && includeLinks) { const text = getTextContent(node).trim(); const href = node.getAttribute('href'); if (text && href) { try { const fullHref = new URL(href, window.location.href).href; return `[${text}](${fullHref})`; } catch (e) { return text; } } return text || ''; } // Strong/Bold if (tagName === 'strong' || tagName === 'b') { const text = getTextContent(node).trim(); return text ? `**${text}**` : ''; } // Emphasis/Italic if (tagName === 'em' || tagName === 'i') { const text = getTextContent(node).trim(); return text ? `*${text}*` : ''; } // Horizontal rule if (tagName === 'hr') { return '\n---\n\n'; } // Tables if (tagName === 'table') { const rows = Array.from(node.querySelectorAll('tr')); if (rows.length > 0) { rows.forEach((row, rowIdx) => { const cells = Array.from(row.querySelectorAll('th, td')); const cellTexts = cells.map(cell => getTextContent(cell).trim().replace(/\n/g, ' ')); if (cellTexts.some(t => t)) { markdown += '| ' + cellTexts.join(' | ') + ' |\n'; if (rowIdx === 0) { markdown += '| ' + cells.map(() => '---').join(' | ') + ' |\n'; } } }); markdown += '\n'; } return markdown; } // Line break if (tagName === 'br') { return '\n'; } // Container elements - process children if (['div', 'section', 'article', 'main', 'span', 'td', 'th', 'li'].includes(tagName)) { for (const child of node.childNodes) { if (child.nodeType === Node.ELEMENT_NODE) { markdown += convertToMarkdown(child, depth + 1, inList); } else if (child.nodeType === Node.TEXT_NODE && depth === 0 && !inList) { const text = child.textContent.trim(); if (text && text.length > 0) { markdown += text + ' '; } } } return markdown; } // For any other element, try to extract text from children if (node.childNodes && node.childNodes.length > 0) { for (const child of node.childNodes) { if (child.nodeType === Node.ELEMENT_NODE) { markdown += convertToMarkdown(child, depth + 1, inList); } } } return markdown; } const mainContent = extractMainContent(); let result = convertToMarkdown(mainContent); // Clean up excessive newlines and spaces result = result .replace(/ +/g, ' ') // Multiple spaces to single .replace(/\n\n\n+/g, '\n\n') // Multiple newlines to double .trim(); // If still empty, use fallback if (!result || result.length < 50) { const allText = mainContent.textContent.trim(); if (allText) { result = allText .split('\n') .map(line => line.trim()) .filter(line => line.length > 0) .join('\n\n'); } } return result; }, { includeImages, includeLinks } ); await context.close(); return { content: [ { type: 'text', text: markdown || 'No content could be extracted from this page.', }, ], }; } catch (error) { await context.close(); return { content: [ { type: 'text', text: `Error extracting markdown: ${error.message}`, }, ], isError: true, }; } } async run() { const transport = new StdioServerTransport(); await this.server.connect(transport); process.on('SIGINT', async () => { if (this.browser) { await this.browser.close(); } process.exit(0); }); } } const server = new MarkdownMCPServer(); server.run().catch(console.error);

Loading blob content...

Implementation Reference

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/vishwajeetdabholkar/markdown-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

markdown-mcp.js•14.5 KiB