We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/sophia-labs/mnemosyne-mcp'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
"""Convert Markdown to TipTap XML.
Pure function: markdown_to_tiptap_xml(md_str) -> xml_str
Uses mistune 3.x in AST mode to parse markdown, then walks the AST to
emit TipTap XML. No HTML intermediate step.
Handles: headings, paragraphs, lists (bullet/ordered/task with nesting),
code blocks, blockquotes, horizontal rules, and inline marks (bold, italic,
strikethrough, inline code, links). Footnotes are converted to TipTap's
self-closing <footnote/> elements.
"""
from __future__ import annotations
import html
from typing import Any, Dict, List, Optional
import mistune
# Singleton parser with plugins enabled
_md_parser = mistune.create_markdown(
renderer="ast",
plugins=["strikethrough", "task_lists", "footnotes", "table"],
)
def markdown_to_tiptap_xml(md_str: str) -> str:
"""Convert a Markdown string to TipTap XML.
Args:
md_str: Markdown content.
Returns:
TipTap XML string suitable for write_document / update_block.
"""
if not md_str or not md_str.strip():
return ""
ast = _md_parser(md_str)
if not ast:
return ""
# Collect footnote definitions for reference
footnotes = _collect_footnotes(ast)
parts: list[str] = []
for node in ast:
xml = _convert_block(node, footnotes)
if xml:
parts.append(xml)
return "".join(parts)
# ---------------------------------------------------------------------------
# Footnote collection
# ---------------------------------------------------------------------------
def _collect_footnotes(ast: list[dict]) -> dict[str, str]:
"""Extract footnote definitions from the AST into a key→content map."""
footnotes: dict[str, str] = {}
for node in ast:
if node.get("type") == "footnotes":
for item in node.get("children", []):
if item.get("type") == "footnote_item":
key = str(item.get("attrs", {}).get("key", ""))
# Render footnote body as plain text
content_parts: list[str] = []
for child in item.get("children", []):
content_parts.append(_plain_text_from_node(child))
footnotes[key] = " ".join(content_parts).strip()
return footnotes
def _plain_text_from_node(node: dict) -> str:
"""Extract plain text from an AST node, ignoring formatting."""
ntype = node.get("type", "")
if ntype == "text":
return node.get("raw", "")
if ntype in ("softbreak", "linebreak"):
return " "
children = node.get("children", [])
return "".join(_plain_text_from_node(c) for c in children)
# ---------------------------------------------------------------------------
# Block-level conversion
# ---------------------------------------------------------------------------
def _convert_block(node: dict, footnotes: dict[str, str]) -> str:
"""Convert a block-level AST node to TipTap XML."""
ntype = node.get("type", "")
if ntype == "heading":
return _convert_heading(node, footnotes)
if ntype == "paragraph":
return _convert_paragraph(node, footnotes)
if ntype == "list":
return _convert_list(node, footnotes)
if ntype == "block_code":
return _convert_code_block(node)
if ntype == "block_quote":
return _convert_blockquote(node, footnotes)
if ntype == "thematic_break":
return "<horizontalRule/>"
if ntype == "table":
return _convert_table(node, footnotes)
if ntype in ("blank_line", "footnotes"):
return ""
# Unknown block — try to extract as paragraph
children = node.get("children")
if children:
inline = _convert_inline_children(children, footnotes)
if inline:
return f"<paragraph>{inline}</paragraph>"
return ""
def _convert_heading(node: dict, footnotes: dict[str, str]) -> str:
level = node.get("attrs", {}).get("level", 1)
content = _convert_inline_children(node.get("children", []), footnotes)
return f'<heading level="{level}">{content}</heading>'
def _convert_paragraph(node: dict, footnotes: dict[str, str]) -> str:
content = _convert_inline_children(node.get("children", []), footnotes)
return f"<paragraph>{content}</paragraph>"
def _convert_code_block(node: dict) -> str:
info = node.get("attrs", {}).get("info", "") or ""
# Strip any extra info string tokens (e.g. "python title=foo")
language = info.split()[0] if info else ""
raw = node.get("raw", "")
# Remove trailing newline that mistune adds
if raw.endswith("\n"):
raw = raw[:-1]
escaped = html.escape(raw)
if language:
return f'<codeBlock language="{html.escape(language)}">{escaped}</codeBlock>'
return f"<codeBlock>{escaped}</codeBlock>"
def _convert_blockquote(node: dict, footnotes: dict[str, str]) -> str:
parts: list[str] = []
for child in node.get("children", []):
xml = _convert_block(child, footnotes)
if xml:
parts.append(xml)
return f'<blockquote>{"".join(parts)}</blockquote>'
def _convert_table(node: dict, footnotes: dict[str, str]) -> str:
"""Convert a table AST node to TipTap table XML.
Mistune table structure:
- table_head: contains table_cell directly (no table_row wrapper)
- table_body: contains table_row, which contains table_cell
"""
rows: list[str] = []
for section in node.get("children", []):
section_type = section.get("type", "")
if section_type == "table_head":
# Header row: table_head contains table_cell directly
cells: list[str] = []
for cell_node in section.get("children", []):
if cell_node.get("type") != "table_cell":
continue
content = _convert_inline_children(cell_node.get("children", []), footnotes)
cells.append(f"<tableHeader><paragraph>{content}</paragraph></tableHeader>")
if cells:
rows.append(f'<tableRow>{"".join(cells)}</tableRow>')
elif section_type == "table_body":
# Body rows: table_body > table_row > table_cell
for row_node in section.get("children", []):
if row_node.get("type") != "table_row":
continue
cells: list[str] = []
for cell_node in row_node.get("children", []):
if cell_node.get("type") != "table_cell":
continue
content = _convert_inline_children(cell_node.get("children", []), footnotes)
cells.append(f"<tableCell><paragraph>{content}</paragraph></tableCell>")
if cells:
rows.append(f'<tableRow>{"".join(cells)}</tableRow>')
if rows:
return f'<table>{"".join(rows)}</table>'
return ""
# ---------------------------------------------------------------------------
# List flattening
# ---------------------------------------------------------------------------
def _convert_list(node: dict, footnotes: dict[str, str], base_indent: int = 0) -> str:
"""Flatten a nested list into TipTap's flat listItem representation."""
ordered = node.get("attrs", {}).get("ordered", False)
items = _flatten_list(node, footnotes, base_indent, ordered)
return "".join(items)
def _flatten_list(
node: dict,
footnotes: dict[str, str],
indent: int,
ordered: bool,
) -> list[str]:
"""Recursively flatten a list node into flat listItem XML strings."""
items: list[str] = []
for child in node.get("children", []):
ctype = child.get("type", "")
if ctype == "task_list_item":
items.extend(_flatten_task_item(child, footnotes, indent))
elif ctype == "list_item":
items.extend(_flatten_list_item(child, footnotes, indent, ordered))
return items
def _flatten_list_item(
node: dict,
footnotes: dict[str, str],
indent: int,
ordered: bool,
) -> list[str]:
"""Convert a single list_item (possibly with nested lists) to flat items."""
items: list[str] = []
list_type = "ordered" if ordered else "bullet"
# Separate inline content from nested lists
inline_children: list[dict] = []
nested_lists: list[dict] = []
for child in node.get("children", []):
ctype = child.get("type", "")
if ctype == "list":
nested_lists.append(child)
elif ctype == "block_text":
# block_text contains inline children
inline_children.extend(child.get("children", []))
elif ctype == "paragraph":
inline_children.extend(child.get("children", []))
else:
inline_children.append(child)
# Emit the list item with inline content
content = _convert_inline_children(inline_children, footnotes) if inline_children else ""
indent_attr = f' data-indent="{indent}"' if indent > 0 else ""
items.append(
f'<listItem listType="{list_type}"{indent_attr}>'
f"<paragraph>{content}</paragraph>"
f"</listItem>"
)
# Recursively flatten nested lists at indent+1
for nested in nested_lists:
nested_ordered = nested.get("attrs", {}).get("ordered", False)
items.extend(_flatten_list(nested, footnotes, indent + 1, nested_ordered))
return items
def _flatten_task_item(
node: dict,
footnotes: dict[str, str],
indent: int,
) -> list[str]:
"""Convert a task_list_item to a TipTap taskItem."""
checked = node.get("attrs", {}).get("checked", False)
checked_str = "true" if checked else "false"
inline_children: list[dict] = []
for child in node.get("children", []):
ctype = child.get("type", "")
if ctype == "block_text":
inline_children.extend(child.get("children", []))
elif ctype == "paragraph":
inline_children.extend(child.get("children", []))
else:
inline_children.append(child)
content = _convert_inline_children(inline_children, footnotes) if inline_children else ""
indent_attr = f' data-indent="{indent}"' if indent > 0 else ""
return [
f'<taskItem checked="{checked_str}"{indent_attr}>'
f"<paragraph>{content}</paragraph>"
f"</taskItem>"
]
# ---------------------------------------------------------------------------
# Inline conversion
# ---------------------------------------------------------------------------
def _convert_inline_children(
children: list[dict],
footnotes: dict[str, str],
) -> str:
"""Convert a list of inline AST nodes to TipTap XML inline content."""
parts: list[str] = []
for child in children:
parts.append(_convert_inline(child, footnotes))
return "".join(parts)
def _convert_inline(node: dict, footnotes: dict[str, str]) -> str:
"""Convert a single inline AST node to TipTap XML."""
ntype = node.get("type", "")
if ntype == "text":
return html.escape(node.get("raw", ""))
if ntype == "strong":
content = _convert_inline_children(node.get("children", []), footnotes)
return f"<strong>{content}</strong>"
if ntype == "emphasis":
content = _convert_inline_children(node.get("children", []), footnotes)
return f"<em>{content}</em>"
if ntype == "strikethrough":
content = _convert_inline_children(node.get("children", []), footnotes)
return f"<s>{content}</s>"
if ntype == "codespan":
raw = node.get("raw", "")
return f"<code>{html.escape(raw)}</code>"
if ntype == "link":
url = node.get("attrs", {}).get("url", "")
content = _convert_inline_children(node.get("children", []), footnotes)
return f'<a href="{html.escape(url)}">{content}</a>'
if ntype == "image":
# TipTap doesn't have a standard image inline — skip gracefully
alt = _plain_text_from_node(node)
return html.escape(alt) if alt else ""
if ntype == "softbreak":
# Soft line break within a paragraph — TipTap ignores these
return " "
if ntype == "linebreak":
return "<hardBreak/>"
if ntype == "footnote_ref":
key = str(node.get("raw", ""))
fn_content = footnotes.get(key, "")
return f'<footnote data-footnote-content="{html.escape(fn_content)}"/>'
# Unknown inline — try to extract text
children = node.get("children")
if children:
return _convert_inline_children(children, footnotes)
raw = node.get("raw", "")
if raw:
return html.escape(raw)
return ""
# ---------------------------------------------------------------------------
# Markdown detection heuristic
# ---------------------------------------------------------------------------
# Patterns that unambiguously indicate markdown (not plain text)
_MARKDOWN_PATTERNS = [
# ATX headings
r"^#{1,6}\s",
# Bold/italic (must have non-space after opener)
r"\*\*\S",
r"\*\S",
# Unordered list items at start of line
r"^\s*[-*+]\s",
# Ordered list items at start of line
r"^\s*\d+\.\s",
# Fenced code blocks
r"^```",
# Links [text](url)
r"\[.+?\]\(.+?\)",
# Blockquotes
r"^>\s",
# Horizontal rules (3+ dashes/asterisks/underscores alone on line)
r"^-{3,}\s*$",
r"^\*{3,}\s*$",
r"^_{3,}\s*$",
# Task lists
r"^\s*[-*+]\s+\[[ xX]\]",
# Strikethrough
r"~~\S",
]
import re
_MARKDOWN_RE = [re.compile(p, re.MULTILINE) for p in _MARKDOWN_PATTERNS]
def looks_like_markdown(text: str) -> bool:
"""Conservative heuristic: returns True only if the text contains
unambiguous markdown patterns.
This is used by _ensure_xml() to decide whether to parse markdown
or treat the input as plain text. False negatives (markdown treated
as plain text) are safe — the text just gets wrapped in <paragraph>.
False positives (plain text parsed as markdown) would mangle content,
so we err on the side of caution.
"""
if not text or not text.strip():
return False
# If it already looks like XML, it's not markdown
if text.strip().startswith("<"):
return False
for pattern in _MARKDOWN_RE:
if pattern.search(text):
return True
return False