Skip to main content
Glama

Markdown MCP Server

markdown-mcp.jsโ€ข14.9 kB
#!/usr/bin/env node /** * Markdown MCP Server * Extracts clean markdown content from web pages using Playwright */ const { StdioServerTransport } = require('@modelcontextprotocol/sdk/server/stdio.js'); const { Server } = require('@modelcontextprotocol/sdk/server/index.js'); const { CallToolRequestSchema, ListToolsRequestSchema } = require('@modelcontextprotocol/sdk/types.js'); const { chromium } = require('playwright'); class MarkdownMCPServer { constructor() { this.server = new Server( { name: 'markdown-mcp', version: '1.0.0', }, { capabilities: { tools: {}, }, } ); this.browser = null; this.setupHandlers(); } setupHandlers() { this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: 'get_page_markdown', description: 'Extract clean markdown content from a URL. Returns only the main content without navigation, headers, footers, or sidebars.', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'The URL to extract markdown from', }, includeImages: { type: 'boolean', description: 'Whether to include image references in markdown (default: true)', default: true, }, includeLinks: { type: 'boolean', description: 'Whether to include hyperlinks in markdown (default: true)', default: true, }, waitForSelector: { type: 'string', description: 'Optional CSS selector to wait for before extracting content', }, timeout: { type: 'number', description: 'Navigation timeout in milliseconds (default: 30000)', default: 30000, }, }, required: ['url'], }, }, ], })); this.server.setRequestHandler(CallToolRequestSchema, async (request) => { if (request.params.name === 'get_page_markdown') { return await this.getPageMarkdown(request.params.arguments); } throw new Error(`Unknown tool: ${request.params.name}`); }); } async ensureBrowser() { if (!this.browser) { this.browser = await chromium.launch({ headless: true, }); } return this.browser; } async getPageMarkdown(args) { const { url, includeImages = true, includeLinks = true, waitForSelector, timeout = 30000, } = args; const browser = await this.ensureBrowser(); const context = await browser.newContext(); const page = await context.newPage(); try { await page.goto(url, { waitUntil: 'domcontentloaded', timeout }); if (waitForSelector) { await page.waitForSelector(waitForSelector, { timeout: 10000 }); } else { // Wait for content to load - especially important for JS-heavy sites await page.waitForTimeout(5000); } const markdown = await page.evaluate( ({ includeImages, includeLinks }) => { function extractMainContent() { // Confluence-specific selectors first, then general ones const mainSelectors = [ '#main-content', '.wiki-content', '[data-test-id="wiki-content"]', 'main[role="main"]', 'main', 'article', '[role="main"]', '.main-content', '.content', '#content', '.post-content', '.article-content', 'body', ]; for (const selector of mainSelectors) { const element = document.querySelector(selector); if (element && element.textContent.trim().length > 100) { return element; } } return document.body; } function shouldSkipElement(element) { if (!element || !element.tagName) return true; const tagName = element.tagName.toLowerCase(); // Never skip these content elements if (['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol', 'li', 'table', 'pre', 'code', 'blockquote'].includes(tagName)) { return false; } // Check for hidden elements if (element.offsetParent === null && tagName !== 'script' && tagName !== 'style') { const style = window.getComputedStyle(element); if (style.display === 'none' || style.visibility === 'hidden') { return true; } } // Skip technical elements if (['script', 'style', 'noscript', 'iframe'].includes(tagName)) { return true; } // Check role attributes const role = element.getAttribute('role'); if (['navigation', 'banner', 'contentinfo', 'complementary'].includes(role)) { return true; } // Check specific element types if (tagName === 'nav' || tagName === 'header' || tagName === 'footer' || tagName === 'aside') { return true; } // Check class and id for common patterns (but be less aggressive) const className = (element.className || '').toString().toLowerCase(); const id = (element.id || '').toLowerCase(); const combined = className + ' ' + id; const strictSkipPatterns = [ 'cookie-banner', 'gdpr', 'advertisement', 'sponsored', ]; return strictSkipPatterns.some(pattern => combined.includes(pattern)); } function getTextContent(node) { let text = ''; for (const child of node.childNodes) { if (child.nodeType === Node.TEXT_NODE) { text += child.textContent; } else if (child.nodeType === Node.ELEMENT_NODE) { const tag = child.tagName.toLowerCase(); if (tag === 'br') { text += '\n'; } else if (!shouldSkipElement(child)) { text += getTextContent(child); } } } return text; } function convertToMarkdown(node, depth = 0, inList = false) { if (!node || shouldSkipElement(node)) return ''; let markdown = ''; const tagName = node.tagName?.toLowerCase(); // Headings if (tagName?.match(/^h[1-6]$/)) { const level = parseInt(tagName[1]); const text = getTextContent(node).trim(); if (text) { markdown += '\n' + '#'.repeat(level) + ' ' + text + '\n\n'; } return markdown; } // Paragraphs if (tagName === 'p') { let content = ''; for (const child of node.childNodes) { if (child.nodeType === Node.TEXT_NODE) { content += child.textContent; } else if (child.nodeType === Node.ELEMENT_NODE) { content += convertToMarkdown(child, depth + 1); } } const text = content.trim(); if (text) { markdown += text + '\n\n'; } return markdown; } // Code blocks if (tagName === 'pre') { const code = node.querySelector('code'); const text = (code || node).textContent.trim(); if (text) { const language = code?.className.match(/language-(\w+)/)?.[1] || ''; markdown += '\n```' + language + '\n' + text + '\n```\n\n'; } return markdown; } // Inline code if (tagName === 'code' && node.parentElement?.tagName !== 'PRE') { return '`' + node.textContent.trim() + '`'; } // Blockquotes if (tagName === 'blockquote') { const text = getTextContent(node).trim(); if (text) { const lines = text.split('\n').filter(l => l.trim()); markdown += '\n' + lines.map(line => '> ' + line.trim()).join('\n') + '\n\n'; } return markdown; } // Lists if (tagName === 'ul' || tagName === 'ol') { const items = Array.from(node.children).filter(child => child.tagName === 'LI'); items.forEach((li, idx) => { const prefix = tagName === 'ol' ? `${idx + 1}. ` : '- '; let itemContent = ''; for (const child of li.childNodes) { if (child.nodeType === Node.TEXT_NODE) { itemContent += child.textContent; } else if (child.nodeType === Node.ELEMENT_NODE) { itemContent += convertToMarkdown(child, depth + 1, true); } } const text = itemContent.trim(); if (text) { markdown += prefix + text + '\n'; } }); if (!inList) markdown += '\n'; return markdown; } // Images if (tagName === 'img' && includeImages) { const alt = node.getAttribute('alt') || ''; const src = node.getAttribute('src') || node.getAttribute('data-src') || ''; if (src) { try { const fullSrc = new URL(src, window.location.href).href; markdown += `![${alt}](${fullSrc})\n\n`; } catch (e) { // Invalid URL, skip } } return markdown; } // Links if (tagName === 'a' && includeLinks) { const text = getTextContent(node).trim(); const href = node.getAttribute('href'); if (text && href) { try { const fullHref = new URL(href, window.location.href).href; return `[${text}](${fullHref})`; } catch (e) { return text; } } return text || ''; } // Strong/Bold if (tagName === 'strong' || tagName === 'b') { const text = getTextContent(node).trim(); return text ? `**${text}**` : ''; } // Emphasis/Italic if (tagName === 'em' || tagName === 'i') { const text = getTextContent(node).trim(); return text ? `*${text}*` : ''; } // Horizontal rule if (tagName === 'hr') { return '\n---\n\n'; } // Tables if (tagName === 'table') { const rows = Array.from(node.querySelectorAll('tr')); if (rows.length > 0) { rows.forEach((row, rowIdx) => { const cells = Array.from(row.querySelectorAll('th, td')); const cellTexts = cells.map(cell => getTextContent(cell).trim().replace(/\n/g, ' ')); if (cellTexts.some(t => t)) { markdown += '| ' + cellTexts.join(' | ') + ' |\n'; if (rowIdx === 0) { markdown += '| ' + cells.map(() => '---').join(' | ') + ' |\n'; } } }); markdown += '\n'; } return markdown; } // Line break if (tagName === 'br') { return '\n'; } // Container elements - process children if (['div', 'section', 'article', 'main', 'span', 'td', 'th', 'li'].includes(tagName)) { for (const child of node.childNodes) { if (child.nodeType === Node.ELEMENT_NODE) { markdown += convertToMarkdown(child, depth + 1, inList); } else if (child.nodeType === Node.TEXT_NODE && depth === 0 && !inList) { const text = child.textContent.trim(); if (text && text.length > 0) { markdown += text + ' '; } } } return markdown; } // For any other element, try to extract text from children if (node.childNodes && node.childNodes.length > 0) { for (const child of node.childNodes) { if (child.nodeType === Node.ELEMENT_NODE) { markdown += convertToMarkdown(child, depth + 1, inList); } } } return markdown; } const mainContent = extractMainContent(); let result = convertToMarkdown(mainContent); // Clean up excessive newlines and spaces result = result .replace(/ +/g, ' ') // Multiple spaces to single .replace(/\n\n\n+/g, '\n\n') // Multiple newlines to double .trim(); // If still empty, use fallback if (!result || result.length < 50) { const allText = mainContent.textContent.trim(); if (allText) { result = allText .split('\n') .map(line => line.trim()) .filter(line => line.length > 0) .join('\n\n'); } } return result; }, { includeImages, includeLinks } ); await context.close(); return { content: [ { type: 'text', text: markdown || 'No content could be extracted from this page.', }, ], }; } catch (error) { await context.close(); return { content: [ { type: 'text', text: `Error extracting markdown: ${error.message}`, }, ], isError: true, }; } } async run() { const transport = new StdioServerTransport(); await this.server.connect(transport); process.on('SIGINT', async () => { if (this.browser) { await this.browser.close(); } process.exit(0); }); } } const server = new MarkdownMCPServer(); server.run().catch(console.error);

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/vishwajeetdabholkar/markdown-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server