Skip to main content
Glama
cbcoutinho

Nextcloud MCP Server

by cbcoutinho
html_processor.py1.61 kB
"""HTML to Markdown conversion utilities for vector sync.""" import logging from markdownify import markdownify as md logger = logging.getLogger(__name__) def html_to_markdown(html_content: str | None) -> str: """Convert HTML content to Markdown, preserving semantic structure. This function converts HTML (typically from RSS/Atom feed items) to Markdown for better text embedding. Markdown preserves: - Heading hierarchy (important for document structure) - Lists (bullet and numbered) - Links (as [text](url)) - Bold/italic emphasis - Paragraphs and line breaks Args: html_content: HTML string to convert (may be None or empty) Returns: Markdown string, or empty string if input is None/empty Example: >>> html_to_markdown("<h1>Title</h1><p>Content with <b>bold</b>.</p>") '# Title\\n\\nContent with **bold**.\\n\\n' """ if not html_content: return "" try: markdown = md( html_content, heading_style="ATX", # Use # style headings strip=["script", "style", "iframe", "noscript"], # Remove unsafe elements bullets="-", # Use - for unordered lists code_language="", # Don't add language hints to code blocks ) return markdown.strip() except Exception as e: logger.warning(f"Failed to convert HTML to Markdown: {e}") # Fallback: strip all HTML tags as a last resort import re text = re.sub(r"<[^>]+>", " ", html_content) return " ".join(text.split()) # Normalize whitespace

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cbcoutinho/nextcloud-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server