Deckbuilder MCP Server

formatter.py•19.8 KiB

#!/usr/bin/env python3 """ Universal Content Formatting Module Handles ALL formatting types independently of structure processing. This module can be used by both JSON and Markdown processing without circular dependencies. Supports: - Inline formatting: **bold**, *italic*, ___underline___, ***bold italic*** - Rich content blocks: headings, paragraphs, bullets with levels - Table cell formatting: maintaining structure while applying formatting - Complex combinations: mixed formatting within same text Separated from structure handling to enable clean architecture. """ import re from typing import List, Dict, Any, Union class ContentFormatter: """Universal content formatting processor.""" def __init__(self, language_code=None, font_name=None): """Initialize the content formatter with optional language and font settings.""" self.language_code = language_code self.font_name = font_name # Only import FormattingSupport if formatting needed self.formatter = None if language_code is not None or font_name is not None: from .formatting_support import FormattingSupport self.formatter = FormattingSupport() def parse_inline_formatting(self, text: str) -> List[Dict[str, Any]]: """ Parse inline formatting and return structured formatting data. Args: text: Text with inline formatting markers Returns: List of segments with text and formatting attributes """ if not text: return [{"text": "", "format": {}}] # Patterns in order of precedence (longest patterns first to avoid conflicts) patterns = [ ( r"\*\*\*___(.*?)___\*\*\*", {"bold": True, "italic": True, "underline": True}, ), # ***___text___*** ( r"___\*\*\*(.*?)\*\*\*___", {"bold": True, "italic": True, "underline": True}, ), # ___***text***___ (r"\*\*\*(.*?)\*\*\*", {"bold": True, "italic": True}), # ***text*** (r"___(.*?)___", {"underline": True}), # ___text___ (r"\*\*(.*?)\*\*", {"bold": True}), # **text** (r"\*(.*?)\*", {"italic": True}), # *text* ] # Find all matches and their positions all_matches = [] for pattern, format_dict in patterns: for match in re.finditer(pattern, text): all_matches.append((match.start(), match.end(), match.group(1), format_dict)) # Sort matches by position all_matches.sort(key=lambda x: x[0]) # Remove overlapping matches (keep the first one found) filtered_matches = [] last_end = 0 for start, end, content, format_dict in all_matches: if start >= last_end: filtered_matches.append((start, end, content, format_dict)) last_end = end # Build the formatted text segments segments = [] last_pos = 0 for start, end, content, format_dict in filtered_matches: # Add plain text before the formatted text if start > last_pos: plain_text = text[last_pos:start] if plain_text: segments.append({"text": plain_text, "format": {}}) # Add formatted text segments.append({"text": content, "format": format_dict}) last_pos = end # Add any remaining plain text if last_pos < len(text): remaining_text = text[last_pos:] if remaining_text: segments.append({"text": remaining_text, "format": {}}) # If no formatting found, return the original text if not segments: segments = [{"text": text, "format": {}}] return segments def apply_inline_formatting(self, text: str, paragraph) -> None: """ Apply inline formatting to a paragraph, including heading detection and font scaling. Args: text: The text to format. paragraph: The paragraph to apply the formatting to. """ # Check if this text contains heading syntax - process line by line for headings lines = text.split("\n") if len(lines) > 1: # Multi-line text - check for headings self._apply_multiline_formatting_with_headings(text, paragraph) else: # Single line - check if it's a heading line = lines[0].strip() heading_level = self._detect_heading_level(line) if heading_level: # Extract heading text without the # markers heading_text = line[heading_level + 1 :].strip() # +1 for the space after # self._apply_heading_formatting(paragraph, heading_text, heading_level) else: # Regular text - apply normal inline formatting segments = self.parse_inline_formatting(text) for segment in segments: run = paragraph.add_run() run.text = segment["text"] if "bold" in segment["format"]: run.font.bold = True if "italic" in segment["format"]: run.font.italic = True if "underline" in segment["format"]: run.font.underline = True # Apply language and font formatting if specified self._apply_language_font_formatting(run) def apply_formatted_segments_to_paragraph(self, formatted_segments, paragraph): """Apply formatted text segments to a paragraph.""" if not formatted_segments: return # Clear existing runs paragraph.clear() for segment in formatted_segments: if isinstance(segment, dict) and "text" in segment: text = segment["text"] format_info = segment.get("format", {}) run = paragraph.add_run() run.text = text # Apply inline formatting if format_info.get("bold"): run.font.bold = True if format_info.get("italic"): run.font.italic = True if format_info.get("underline"): run.font.underline = True # Apply language and font formatting if specified self._apply_language_font_formatting(run) def _apply_language_font_formatting(self, run): """Apply language and font formatting to a text run if specified.""" if self.formatter is None: return # No formatting specified if self.language_code is not None: self.formatter.apply_language_to_run(run, self.language_code) if self.font_name is not None: self.formatter.apply_font_to_run(run, self.font_name) def apply_formatted_segments_to_cell(self, cell, segments): """Apply formatted text segments to a table cell.""" text_frame = cell.text_frame text_frame.clear() # Create first paragraph paragraph = text_frame.paragraphs[0] paragraph.text = "" # Apply each segment for segment in segments: run = paragraph.add_run() run.text = segment["text"] # Apply formatting format_dict = segment["format"] if format_dict.get("bold"): run.font.bold = True if format_dict.get("italic"): run.font.italic = True if format_dict.get("underline"): run.font.underline = True # Apply language and font formatting if specified self._apply_language_font_formatting(run) def add_content_to_placeholder(self, placeholder, content): """ Add content to a placeholder, handling lists and strings. Args: placeholder: The placeholder to add content to. content: The content to add. """ if isinstance(content, list): for item in content: p = placeholder.text_frame.add_paragraph() self.apply_inline_formatting(item, p) else: self.apply_inline_formatting(str(content), placeholder.text_frame.paragraphs[0]) def format_simple_content_list(self, content_list: List[str]) -> List[Dict[str, Any]]: """ Convert simple content list to rich content with formatting. Args: content_list: List of strings with possible inline formatting Returns: Rich content list with formatting preserved """ if not content_list: return [] rich_content = [] for item in content_list: if isinstance(item, str): # Treat as paragraph text with inline formatting rich_content.append({"paragraph": item, "formatted": self.parse_inline_formatting(item)}) else: # Keep non-string items as-is rich_content.append(item) return rich_content def format_field_content(self, content: Union[str, List[str], Dict[str, Any]]) -> Dict[str, Any]: """ Format any field content type with inline formatting preservation. Args: content: Content to format (string, list, or dict) Returns: Formatted content with structure preserved """ if isinstance(content, str): return {"text": content, "formatted": self.parse_inline_formatting(content)} elif isinstance(content, list): return { "list": content, "formatted_list": [ { "text": item, "formatted": (self.parse_inline_formatting(item) if isinstance(item, str) else item), } for item in content ], } elif isinstance(content, dict): # Recursively format dict content formatted_dict = {} for key, value in content.items(): if isinstance(value, str): formatted_dict[key] = { "text": value, "formatted": self.parse_inline_formatting(value), } else: formatted_dict[key] = value return formatted_dict else: # Return non-string/list/dict content as-is return {"raw": content} def format_table_data(self, table_data: Dict[str, Any]) -> Dict[str, Any]: """ Format table data while preserving structure. Args: table_data: Table configuration with data Returns: Table data with formatting applied to cells """ if not isinstance(table_data, dict) or "data" not in table_data: return table_data formatted_table = table_data.copy() if isinstance(table_data["data"], list): formatted_data = [] for row in table_data["data"]: if isinstance(row, list): formatted_row = [] for cell in row: if isinstance(cell, str): formatted_row.append({"text": cell, "formatted": self.parse_inline_formatting(cell)}) else: # Keep non-string cells as-is formatted_row.append(cell) formatted_data.append(formatted_row) else: # Keep non-list rows as-is formatted_data.append(row) formatted_table["data"] = formatted_data return formatted_table def format_rich_content_blocks(self, rich_content: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Format rich content blocks (headings, paragraphs, bullets). Args: rich_content: List of rich content blocks Returns: Rich content with formatting applied """ if not rich_content: return [] formatted_blocks = [] for block in rich_content: if not isinstance(block, dict): formatted_blocks.append(block) continue formatted_block = block.copy() # Format headings if "heading" in block: formatted_block["heading_formatted"] = self.parse_inline_formatting(block["heading"]) # Format paragraphs if "paragraph" in block: formatted_block["paragraph_formatted"] = self.parse_inline_formatting(block["paragraph"]) # Format bullets if "bullets" in block and isinstance(block["bullets"], list): formatted_bullets = [] for bullet in block["bullets"]: if isinstance(bullet, str): formatted_bullets.append({"text": bullet, "formatted": self.parse_inline_formatting(bullet)}) else: formatted_bullets.append(bullet) formatted_block["bullets_formatted"] = formatted_bullets formatted_blocks.append(formatted_block) return formatted_blocks def format_slide_data(self, slide_data: Dict[str, Any]) -> Dict[str, Any]: """ Format complete slide data with ALL formatting types preserved. This is the main entry point for slide formatting. Args: slide_data: Raw slide data dictionary Returns: Slide data with all formatting applied """ if not isinstance(slide_data, dict): raise TypeError(f"slide_data must be a dictionary, got {type(slide_data).__name__}") # Create a copy to avoid modifying original formatted_data = slide_data.copy() # Format title and subtitle for field in ["title", "subtitle"]: if field in formatted_data and formatted_data[field]: formatted_data[f"{field}_formatted"] = self.parse_inline_formatting(formatted_data[field]) # Format simple content lists to rich content if "content" in formatted_data and isinstance(formatted_data["content"], list): formatted_data["rich_content"] = self.format_simple_content_list(formatted_data["content"]) # Keep original for compatibility # del formatted_data["content"] # Format rich content blocks if "rich_content" in formatted_data: formatted_data["rich_content_formatted"] = self.format_rich_content_blocks(formatted_data["rich_content"]) # Format table data if "table" in formatted_data: formatted_data["table_formatted"] = self.format_table_data(formatted_data["table"]) # Format all field content (for semantic field names like content_left, content_col1) content_fields = [key for key in formatted_data.keys() if key.startswith(("content_", "title_")) and not key.endswith("_formatted")] for field in content_fields: content = formatted_data[field] if content: # Only format non-empty content formatted_data[f"{field}_formatted"] = self.format_field_content(content) return formatted_data def _detect_heading_level(self, line: str) -> int: """Detect heading level from line text (H1-H6).""" if line.startswith("###### "): return 6 elif line.startswith("##### "): return 5 elif line.startswith("#### "): return 4 elif line.startswith("### "): return 3 elif line.startswith("## "): return 2 elif line.startswith("# "): return 1 return 0 def _apply_heading_formatting(self, paragraph, text_content, heading_level): """Apply heading formatting with font scaling and proper weights.""" from pptx.util import Pt # Apply inline formatting first (bold, italic, underline) segments = self.parse_inline_formatting(text_content) for segment in segments: run = paragraph.add_run() run.text = segment["text"] if "bold" in segment["format"]: run.font.bold = True if "italic" in segment["format"]: run.font.italic = True if "underline" in segment["format"]: run.font.underline = True # Apply language and font formatting if specified self._apply_language_font_formatting(run) # Apply heading-specific font scaling and weight base_size = 18 # PowerPoint default content size # Calculate scaled font size scale_factors = {1: 1.33, 2: 1.11, 3: 1.0, 4: 0.89, 5: 0.78, 6: 0.67} heading_font_size = base_size * scale_factors.get(heading_level, 1.0) # Apply font size and weight to all runs for run in paragraph.runs: if heading_level in [1, 2]: # H1 and H2: Light weight run.font.bold = False else: # H3-H6: Bold weight run.font.bold = True run.font.size = Pt(heading_font_size) def _apply_multiline_formatting_with_headings(self, text, paragraph): """Handle multi-line text with heading detection.""" lines = text.split("\n") # Get text frame for adding multiple paragraphs text_frame = paragraph._parent # Clear existing content text_frame.clear() for i, line in enumerate(lines): line = line.strip() if not line: continue # Create paragraph for this line if i == 0: current_paragraph = text_frame.paragraphs[0] else: current_paragraph = text_frame.add_paragraph() heading_level = self._detect_heading_level(line) if heading_level: # Extract heading text heading_text = line[heading_level + 1 :].strip() self._apply_heading_formatting(current_paragraph, heading_text, heading_level) else: # Regular text segments = self.parse_inline_formatting(line) for segment in segments: run = current_paragraph.add_run() run.text = segment["text"] if "bold" in segment["format"]: run.font.bold = True if "italic" in segment["format"]: run.font.italic = True if "underline" in segment["format"]: run.font.underline = True # REMOVED: parse_table_markdown_with_formatting() - complex markdown parsing # Replaced with plain text processing in TableHandler for 50%+ performance improvement # Table cells now contain plain text only, markdown treated as literal characters # Global formatter instance factory def get_content_formatter(language_code=None, font_name=None): """Get a ContentFormatter instance with optional formatting parameters.""" return ContentFormatter(language_code=language_code, font_name=font_name) # Default formatter instance for backward compatibility content_formatter = ContentFormatter() def format_slide_content(slide_data: Dict[str, Any]) -> Dict[str, Any]: """ Convenience function for formatting slide content. Args: slide_data: Raw slide data Returns: Formatted slide data """ return content_formatter.format_slide_data(slide_data) def format_inline_text(text: str) -> List[Dict[str, Any]]: """ Convenience function for formatting inline text. Args: text: Text with inline formatting Returns: List of formatted segments """ return content_formatter.parse_inline_formatting(text)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/teknologika/Deckbuilder'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

formatter.py•19.8 KiB