PowerPoint Translator

MIT License

Overview InspectNew Endpoints Schema Related Servers Reviews Score

ppt-translator
ppt_translator

ppt_handler.py•58.5 kB

""" Optimized PowerPoint document handling and text frame updates """ import logging import re from typing import List, Dict, Any, Tuple, Optional from dataclasses import dataclass from pathlib import Path from pptx.dml.color import RGBColor from .config import Config from .dependencies import DependencyManager from .translation_engine import TranslationEngine from .text_utils import SlideTextCollector from .post_processing import PostProcessor logger = logging.getLogger(__name__) @dataclass class TranslationResult: """Data class for translation results""" translated_count: int = 0 translated_notes_count: int = 0 total_shapes: int = 0 errors: List[str] = None def __post_init__(self): if self.errors is None: self.errors = [] class FormattingExtractor: """Handles extraction of formatting information from PowerPoint elements""" @staticmethod def extract_paragraph_structure(text_frame): """Extract paragraph structure including bullets, indentation, margins, and formatting""" paragraph_info = [] try: for paragraph in text_frame.paragraphs: info = FormattingExtractor._extract_single_paragraph_info(paragraph) paragraph_info.append(info) except Exception as e: logger.error(f"Error extracting paragraph structure: {e}") return paragraph_info @staticmethod def _extract_single_paragraph_info(paragraph): """Extract information from a single paragraph""" info = { 'level': getattr(paragraph, 'level', 0), 'text': paragraph.text, 'runs': [], 'bullet_format': None, 'alignment': getattr(paragraph, 'alignment', None), 'space_before': getattr(paragraph, 'space_before', None), 'space_after': getattr(paragraph, 'space_after', None), 'line_spacing': getattr(paragraph, 'line_spacing', None), 'margin_left': None, 'indent': None } # Extract XML-based formatting FormattingExtractor._extract_xml_formatting(paragraph, info) # Extract run formatting for run in paragraph.runs: if run.text.strip(): run_info = FormattingExtractor._extract_run_info(run) info['runs'].append(run_info) return info @staticmethod def _extract_xml_formatting(paragraph, info): """Extract formatting from paragraph XML""" try: if not (hasattr(paragraph, '_element') and paragraph._element is not None): return pPr = paragraph._element.find('.//{http://schemas.openxmlformats.org/drawingml/2006/main}pPr') if pPr is None: return # Extract margin and indent for attr, key in [('marL', 'margin_left'), ('indent', 'indent'), ('algn', 'alignment_xml')]: value = pPr.get(attr) if value: info[key] = value # Extract bullet format info['bullet_format'] = FormattingExtractor._extract_bullet_format(pPr, paragraph) except Exception as e: logger.debug(f"Could not extract paragraph XML info: {e}") # Fallback for indented paragraphs if paragraph.level > 0: info['bullet_format'] = {'type': 'char', 'char': '•'} @staticmethod def _extract_bullet_format(pPr, paragraph): """Extract bullet format from paragraph properties""" bullet_elements = [ ('buNone', 'none'), ('buChar', 'char'), ('buAutoNum', 'autonum') ] for elem_name, bullet_type in bullet_elements: elem = pPr.find(f'.//{{{FormattingExtractor._get_namespace()}}}{elem_name}') if elem is not None: return FormattingExtractor._create_bullet_format(elem, bullet_type) # Default for indented paragraphs if paragraph.level > 0: return {'type': 'char', 'char': '•'} return None @staticmethod def _create_bullet_format(elem, bullet_type): """Create bullet format dictionary from XML element""" if bullet_type == 'none': return {'type': 'none'} elif bullet_type == 'char': return {'type': 'char', 'char': elem.get('char', '•')} elif bullet_type == 'autonum': return { 'type': 'autonum', 'num_type': elem.get('type', 'arabicPeriod'), 'start_at': elem.get('startAt', '1') } return None @staticmethod def _extract_run_info(run): """Extract information from a single run""" run_info = { 'text': run.text, 'formatting': FormattingExtractor._extract_run_formatting(run) } # Check for hyperlinks try: if hasattr(run, 'hyperlink') and run.hyperlink: if hasattr(run.hyperlink, 'address') and run.hyperlink.address: run_info['hyperlink'] = run.hyperlink.address except Exception: pass return run_info @staticmethod def _extract_run_formatting(run) -> Dict[str, Any]: """Extract formatting from a run safely""" formatting = { 'font_name': None, 'font_size': None, 'font_bold': None, 'font_italic': None, 'font_color': None } try: if not (hasattr(run, 'font') and run.font): return formatting font = run.font # Extract basic font properties for attr, key in [('name', 'font_name'), ('size', 'font_size'), ('bold', 'font_bold'), ('italic', 'font_italic')]: if hasattr(font, attr): value = getattr(font, attr) if value is not None: formatting[key] = value # Extract color formatting['font_color'] = FormattingExtractor._extract_font_color(font) except Exception as e: logger.debug(f"Could not extract run formatting: {e}") return formatting @staticmethod def _extract_font_color(font): """Extract font color information with enhanced preservation""" try: if not (hasattr(font, 'color') and font.color): return None color_obj = font.color # Debug logging if hasattr(color_obj, 'type'): logger.debug(f"Color type detected: {color_obj.type}") # Check for RGB color (MSO_COLOR_TYPE.RGB = 1) if hasattr(color_obj, 'type') and color_obj.type == 1: if hasattr(color_obj, 'rgb') and color_obj.rgb: # Extract RGB as individual components for better preservation rgb = color_obj.rgb try: if hasattr(rgb, 'r') and hasattr(rgb, 'g') and hasattr(rgb, 'b'): rgb_info = {'r': rgb.r, 'g': rgb.g, 'b': rgb.b} else: # Handle RGBColor object directly rgb_info = {'r': rgb.r, 'g': rgb.g, 'b': rgb.b} except AttributeError: # Handle string RGB values (hex format like 'FFFF00') try: rgb_str = str(rgb) if len(rgb_str) == 6 and all(c in '0123456789ABCDEFabcdef' for c in rgb_str): # Parse hex string rgb_val = int(rgb_str, 16) rgb_info = { 'r': (rgb_val >> 16) & 0xFF, 'g': (rgb_val >> 8) & 0xFF, 'b': rgb_val & 0xFF } else: # Try to parse as integer rgb_val = int(rgb_str) if rgb_str.isdigit() else int(rgb) rgb_info = { 'r': (rgb_val >> 16) & 0xFF, 'g': (rgb_val >> 8) & 0xFF, 'b': rgb_val & 0xFF } except (ValueError, TypeError) as e: logger.debug(f"Could not parse RGB value: {rgb}, error: {e}") return None logger.debug(f"Extracted RGB color: {rgb_info}") return ('rgb', rgb_info) # Check for theme color (MSO_COLOR_TYPE.THEME = 2) elif hasattr(color_obj, 'type') and color_obj.type == 2: if hasattr(color_obj, 'theme_color'): theme_info = {'theme_color': color_obj.theme_color} # Preserve brightness adjustments (tint/shade) if hasattr(color_obj, 'brightness') and color_obj.brightness is not None: theme_info['brightness'] = color_obj.brightness logger.debug(f"Extracted theme color: {theme_info}") return ('theme', theme_info) # Check for scheme color (MSO_COLOR_TYPE.SCHEME = 3) elif hasattr(color_obj, 'type') and color_obj.type == 3: if hasattr(color_obj, 'scheme_color'): logger.debug(f"Extracted scheme color: {color_obj.scheme_color}") return ('scheme', color_obj.scheme_color) # Fallback: try to get RGB directly if available elif hasattr(color_obj, 'rgb') and color_obj.rgb: rgb = color_obj.rgb try: if hasattr(rgb, 'r') and hasattr(rgb, 'g') and hasattr(rgb, 'b'): rgb_info = {'r': rgb.r, 'g': rgb.g, 'b': rgb.b} else: rgb_info = {'r': rgb.r, 'g': rgb.g, 'b': rgb.b} except AttributeError: try: rgb_str = str(rgb) if len(rgb_str) == 6 and all(c in '0123456789ABCDEFabcdef' for c in rgb_str): rgb_val = int(rgb_str, 16) rgb_info = { 'r': (rgb_val >> 16) & 0xFF, 'g': (rgb_val >> 8) & 0xFF, 'b': rgb_val & 0xFF } else: rgb_val = int(rgb_str) if rgb_str.isdigit() else int(rgb) rgb_info = { 'r': (rgb_val >> 16) & 0xFF, 'g': (rgb_val >> 8) & 0xFF, 'b': rgb_val & 0xFF } except (ValueError, TypeError) as e: logger.debug(f"Could not parse fallback RGB value: {rgb}, error: {e}") return None logger.debug(f"Extracted fallback RGB color: {rgb_info}") return ('rgb', rgb_info) except Exception as e: logger.debug(f"Error extracting font color: {e}") return None @staticmethod def _get_namespace(): """Get the OpenXML namespace for drawing ML""" return "http://schemas.openxmlformats.org/drawingml/2006/main" class FormattingApplier: """Handles application of formatting to PowerPoint elements""" @staticmethod def apply_paragraph_structure(paragraph, para_info, new_text: str, target_language: str = None): """Apply paragraph structure and formatting with language-specific font""" try: # Clear paragraph content paragraph.clear() # Apply paragraph-level properties FormattingApplier._apply_paragraph_properties(paragraph, para_info) # Apply text with run formatting if para_info and para_info.get('runs'): FormattingApplier._apply_runs_with_formatting(paragraph, new_text, para_info['runs'], target_language) else: # Simple text run = paragraph.add_run() run.text = new_text if para_info and para_info.get('runs'): FormattingApplier._apply_run_formatting(run, para_info['runs'][0]['formatting'], target_language) elif target_language: # Apply language-specific font even without existing formatting language_font = Config.get_font_for_language(target_language) run.font.name = language_font logger.debug(f"Applied default font '{language_font}' for language '{target_language}'") except Exception as e: logger.error(f"Failed to apply paragraph structure: {e}") # Fallback paragraph.clear() run = paragraph.add_run() run.text = new_text if target_language: try: language_font = Config.get_font_for_language(target_language) run.font.name = language_font except Exception: pass @staticmethod def _apply_paragraph_properties(paragraph, para_info): """Apply paragraph-level properties""" if not para_info: return # Set level (indentation) if para_info.get('level') is not None: paragraph.level = para_info['level'] # Apply XML-based formatting FormattingApplier._apply_xml_formatting(paragraph, para_info) # Apply other properties for prop, key in [('alignment', 'alignment'), ('space_before', 'space_before'), ('space_after', 'space_after'), ('line_spacing', 'line_spacing')]: if para_info.get(key) is not None: try: setattr(paragraph, prop, para_info[key]) except Exception: pass # Apply bullet formatting FormattingApplier._apply_bullet_format(paragraph, para_info.get('bullet_format')) @staticmethod def _apply_xml_formatting(paragraph, para_info): """Apply XML-based formatting (margins, indents)""" try: if not (hasattr(paragraph, '_element') and paragraph._element is not None): return pPr = paragraph._element.find('.//{http://schemas.openxmlformats.org/drawingml/2006/main}pPr') if pPr is None: from lxml import etree pPr = etree.SubElement(paragraph._element, '{http://schemas.openxmlformats.org/drawingml/2006/main}pPr') # Apply margin and indent for xml_attr, info_key in [('marL', 'margin_left'), ('indent', 'indent'), ('algn', 'alignment_xml')]: if para_info.get(info_key): pPr.set(xml_attr, para_info[info_key]) except Exception as e: logger.debug(f"Could not apply XML formatting: {e}") @staticmethod def _apply_bullet_format(paragraph, bullet_format): """Apply bullet formatting to paragraph""" if not bullet_format: return try: FormattingApplier._apply_bullet_xml(paragraph, bullet_format) except Exception as e: logger.debug(f"Could not apply bullet format: {e}") @staticmethod def _apply_bullet_xml(paragraph, bullet_format): """Apply bullet formatting via XML manipulation""" if not (hasattr(paragraph, '_element') and paragraph._element is not None): return pPr = paragraph._element.find('.//{http://schemas.openxmlformats.org/drawingml/2006/main}pPr') if pPr is None: from lxml import etree pPr = etree.SubElement(paragraph._element, '{http://schemas.openxmlformats.org/drawingml/2006/main}pPr') # Remove existing bullet elements namespace = FormattingExtractor._get_namespace() for elem_name in ['buNone', 'buChar', 'buAutoNum']: for elem in pPr.findall(f'.//{{{namespace}}}{elem_name}'): pPr.remove(elem) # Add new bullet element bullet_type = bullet_format.get('type') if bullet_type == 'none': FormattingApplier._add_bullet_none(pPr, namespace) elif bullet_type == 'char': FormattingApplier._add_bullet_char(pPr, namespace, bullet_format.get('char', '•')) elif bullet_type == 'autonum': FormattingApplier._add_bullet_autonum(pPr, namespace, bullet_format) @staticmethod def _add_bullet_none(pPr, namespace): """Add buNone element""" from lxml import etree etree.SubElement(pPr, f'{{{namespace}}}buNone') @staticmethod def _add_bullet_char(pPr, namespace, char): """Add buChar element""" from lxml import etree buChar = etree.SubElement(pPr, f'{{{namespace}}}buChar') buChar.set('char', char) @staticmethod def _add_bullet_autonum(pPr, namespace, bullet_format): """Add buAutoNum element""" from lxml import etree buAutoNum = etree.SubElement(pPr, f'{{{namespace}}}buAutoNum') buAutoNum.set('type', bullet_format.get('num_type', 'arabicPeriod')) start_at = bullet_format.get('start_at', '1') if start_at != '1': buAutoNum.set('startAt', start_at) @staticmethod def _apply_runs_with_formatting(paragraph, new_text: str, run_info_list, target_language: str = None): """Apply text with preserved run formatting and language-specific font""" try: if len(run_info_list) == 1: # Single run - simple case run = paragraph.add_run() run.text = new_text FormattingApplier._apply_run_formatting(run, run_info_list[0]['formatting'], target_language) else: # Multiple runs - preserve special formatting FormattingApplier._apply_multiple_runs(paragraph, new_text, run_info_list, target_language) except Exception as e: logger.error(f"Error applying runs with formatting: {e}") # Fallback if not paragraph.runs: run = paragraph.add_run() run.text = new_text if run_info_list: FormattingApplier._apply_run_formatting(run, run_info_list[0]['formatting'], target_language) elif target_language: try: language_font = Config.get_font_for_language(target_language) run.font.name = language_font except Exception: pass @staticmethod def _apply_multiple_runs(paragraph, new_text: str, run_info_list, target_language: str = None): """Apply multiple runs with different formatting and language-specific font""" remaining_text = new_text for run_info in run_info_list: original_text = run_info['text'].strip() if original_text and original_text in remaining_text: parts = remaining_text.split(original_text, 1) # Text before this run if parts[0]: run = paragraph.add_run() run.text = parts[0] FormattingApplier._apply_run_formatting(run, run_info_list[0]['formatting'], target_language) # This run with its formatting run = paragraph.add_run() run.text = original_text FormattingApplier._apply_run_formatting(run, run_info['formatting'], target_language) # Apply hyperlink if present if 'hyperlink' in run_info: try: run.hyperlink.address = run_info['hyperlink'] except Exception: pass # Update remaining text remaining_text = parts[1] if len(parts) > 1 else "" # Add any remaining text if remaining_text: run = paragraph.add_run() run.text = remaining_text FormattingApplier._apply_run_formatting(run, run_info_list[-1]['formatting'], target_language) # If no runs were added, add the whole text if not paragraph.runs: run = paragraph.add_run() run.text = new_text FormattingApplier._apply_run_formatting(run, run_info_list[0]['formatting'], target_language) @staticmethod def _apply_run_formatting(run, formatting: Dict[str, Any], target_language: str = None): """Apply formatting to a run safely with language-specific font""" try: if not (hasattr(run, 'font') and run.font): return font = run.font # Apply language-specific font if target language is provided if target_language: language_font = Config.get_font_for_language(target_language) font.name = language_font logger.debug(f"Applied font '{language_font}' for language '{target_language}'") # Apply basic properties (but preserve original font if no target language) for key, attr in [('font_size', 'size'), ('font_bold', 'bold'), ('font_italic', 'italic')]: if formatting.get(key) is not None: setattr(font, attr, formatting[key]) # Apply font name only if not overridden by language-specific font if not target_language and formatting.get('font_name') is not None: font.name = formatting['font_name'] # Apply color - preserve original color for better visibility FormattingApplier._apply_font_color(font, formatting.get('font_color')) except Exception as e: logger.debug(f"Could not apply run formatting: {e}") @staticmethod def _apply_font_color(font, color_info): """Apply font color information with enhanced preservation""" if not color_info or not isinstance(color_info, tuple) or len(color_info) != 2: return try: color_type, color_value = color_info logger.debug(f"Applying color - type: {color_type}, value: {color_value}") if color_type == 'rgb' and color_value: FormattingApplier._apply_rgb_color(font, color_value) elif color_type == 'theme' and color_value: FormattingApplier._apply_theme_color(font, color_value) elif color_type == 'scheme' and color_value: FormattingApplier._apply_scheme_color(font, color_value) except Exception as e: logger.debug(f"Error applying font color: {e}") @staticmethod def _apply_rgb_color(font, color_value): """Apply RGB color to font with enhanced handling""" try: if isinstance(color_value, dict): # New format with individual RGB components r = color_value.get('r', 0) g = color_value.get('g', 0) b = color_value.get('b', 0) logger.debug(f"Applying RGB color: R={r}, G={g}, B={b}") font.color.rgb = RGBColor(r, g, b) elif isinstance(color_value, str) and len(color_value) == 6: # Legacy format - hex string rgb_int = int(color_value, 16) r = (rgb_int >> 16) & 0xFF g = (rgb_int >> 8) & 0xFF b = rgb_int & 0xFF logger.debug(f"Applying RGB color from hex: R={r}, G={g}, B={b}") font.color.rgb = RGBColor(r, g, b) elif isinstance(color_value, int): # Integer RGB value r = (color_value >> 16) & 0xFF g = (color_value >> 8) & 0xFF b = color_value & 0xFF logger.debug(f"Applying RGB color from int: R={r}, G={g}, B={b}") font.color.rgb = RGBColor(r, g, b) except Exception as e: logger.debug(f"Error applying RGB color: {e}") @staticmethod def _apply_theme_color(font, theme_info): """Apply theme color with brightness adjustments""" try: if isinstance(theme_info, dict): theme_color = theme_info.get('theme_color') if theme_color is not None: logger.debug(f"Applying theme color: {theme_color}") font.color.theme_color = theme_color # Apply brightness adjustment if available brightness = theme_info.get('brightness') if brightness is not None: logger.debug(f"Applying brightness: {brightness}") font.color.brightness = brightness else: # Legacy format - direct theme color value logger.debug(f"Applying legacy theme color: {theme_info}") font.color.theme_color = theme_info except Exception as e: logger.debug(f"Error applying theme color: {e}") @staticmethod def _apply_scheme_color(font, scheme_color): """Apply scheme color""" try: logger.debug(f"Applying scheme color: {scheme_color}") font.color.scheme_color = scheme_color except Exception as e: logger.debug(f"Error applying scheme color: {e}") class TextFrameUpdater: """Handles updating PowerPoint text frames with translations""" @staticmethod def update_text_frame(text_frame, new_text: str, target_language: str = None): """Update text frame while preserving formatting, bullets, and indentation with language-specific font""" try: if not text_frame.paragraphs: text_frame.text = new_text # Apply language-specific font to simple text if target_language and text_frame.paragraphs: try: language_font = Config.get_font_for_language(target_language) for paragraph in text_frame.paragraphs: for run in paragraph.runs: run.font.name = language_font except Exception: pass return # Extract paragraph structure information paragraph_info = FormattingExtractor.extract_paragraph_structure(text_frame) # Check for hyperlinks has_hyperlinks = TextFrameUpdater._has_hyperlinks(text_frame) if has_hyperlinks: logger.debug("Hyperlinks detected, using safe hyperlink preservation") TextFrameUpdater._update_with_hyperlinks_safe(text_frame, new_text, paragraph_info, target_language) return # Choose update strategy based on structure TextFrameUpdater._choose_update_strategy(text_frame, new_text, paragraph_info, target_language) except Exception as e: logger.error(f"Formatting error: {str(e)}") text_frame.text = new_text # Apply language-specific font to fallback text if target_language and text_frame.paragraphs: try: language_font = Config.get_font_for_language(target_language) for paragraph in text_frame.paragraphs: for run in paragraph.runs: run.font.name = language_font except Exception: pass @staticmethod def _choose_update_strategy(text_frame, new_text: str, paragraph_info, target_language: str = None): """Choose the appropriate update strategy with language-specific font""" new_lines = new_text.strip().split('\n') # Single paragraph case if len(text_frame.paragraphs) == 1 and len(new_lines) == 1: para_info = paragraph_info[0] if paragraph_info else None FormattingApplier.apply_paragraph_structure(text_frame.paragraphs[0], para_info, new_text.strip(), target_language) return # Multiple paragraphs with same count if len(new_lines) == len(text_frame.paragraphs): TextFrameUpdater._update_matching_paragraphs(text_frame, new_lines, paragraph_info, target_language) else: # Different structure - rebuild with preserved formatting TextFrameUpdater._rebuild_with_structure(text_frame, new_text, paragraph_info, target_language) @staticmethod def _update_matching_paragraphs(text_frame, new_lines, paragraph_info, target_language: str = None): """Update paragraphs when counts match with language-specific font""" for i, (paragraph, new_line) in enumerate(zip(text_frame.paragraphs, new_lines)): if new_line.strip(): para_info = paragraph_info[i] if i < len(paragraph_info) else None FormattingApplier.apply_paragraph_structure(paragraph, para_info, new_line.strip(), target_language) @staticmethod def _rebuild_with_structure(text_frame, new_text: str, paragraph_info, target_language: str = None): """Rebuild text frame with preserved structure and language-specific font""" try: text_frame.clear() new_lines = new_text.strip().split('\n') for i, line in enumerate(new_lines): if i > 0: paragraph = text_frame.add_paragraph() else: paragraph = text_frame.paragraphs[0] # Use corresponding paragraph info if available para_info = paragraph_info[i] if i < len(paragraph_info) else (paragraph_info[0] if paragraph_info else None) FormattingApplier.apply_paragraph_structure(paragraph, para_info, line.strip(), target_language) except Exception as e: logger.error(f"Structure rebuild failed: {e}") text_frame.text = new_text # Apply language-specific font to fallback text if target_language and text_frame.paragraphs: try: language_font = Config.get_font_for_language(target_language) for paragraph in text_frame.paragraphs: for run in paragraph.runs: run.font.name = language_font except Exception: pass @staticmethod def _update_with_hyperlinks_safe(text_frame, new_text: str, paragraph_info=None, target_language: str = None): """Update text frame while preserving hyperlinks and structure with language-specific font""" try: new_lines = new_text.strip().split('\n') for i, line in enumerate(new_lines): if i < len(text_frame.paragraphs): paragraph = text_frame.paragraphs[i] else: paragraph = text_frame.add_paragraph() # Get paragraph info para_info = paragraph_info[i] if paragraph_info and i < len(paragraph_info) else None # Clear and apply structure paragraph.clear() if para_info: FormattingApplier._apply_paragraph_properties(paragraph, para_info) TextFrameUpdater._apply_hyperlinks_to_paragraph(paragraph, line.strip(), para_info, target_language) else: run = paragraph.add_run() run.text = line.strip() if target_language: try: language_font = Config.get_font_for_language(target_language) run.font.name = language_font except Exception: pass except Exception as e: logger.error(f"Safe hyperlink preservation failed: {e}") text_frame.text = new_text # Apply language-specific font to fallback text if target_language and text_frame.paragraphs: try: language_font = Config.get_font_for_language(target_language) for paragraph in text_frame.paragraphs: for run in paragraph.runs: run.font.name = language_font except Exception: pass @staticmethod def _apply_hyperlinks_to_paragraph(paragraph, line: str, para_info, target_language: str = None): """Apply hyperlinks to paragraph with structure preservation and language-specific font""" try: runs_info = para_info.get('runs', []) hyperlink_runs = [run for run in runs_info if run.get('hyperlink')] if not hyperlink_runs: # No hyperlinks, just add text with formatting run = paragraph.add_run() run.text = line if runs_info: FormattingApplier._apply_run_formatting(run, runs_info[0]['formatting'], target_language) elif target_language: try: language_font = Config.get_font_for_language(target_language) run.font.name = language_font except Exception: pass return # Apply hyperlinks remaining_text = line for hyperlink_run in hyperlink_runs: original_text = hyperlink_run['text'].strip() hyperlink_url = hyperlink_run['hyperlink'] # Find hyperlink text in translated line hyperlink_text = TextFrameUpdater._find_hyperlink_text(remaining_text, original_text) if hyperlink_text and hyperlink_text in remaining_text: parts = remaining_text.split(hyperlink_text, 1) # Text before hyperlink if parts[0]: run = paragraph.add_run() run.text = parts[0] default_formatting = next((r['formatting'] for r in runs_info if not r.get('hyperlink')), runs_info[0]['formatting'] if runs_info else {}) FormattingApplier._apply_run_formatting(run, default_formatting, target_language) # Hyperlink text run = paragraph.add_run() run.text = hyperlink_text FormattingApplier._apply_run_formatting(run, hyperlink_run['formatting'], target_language) # Apply hyperlink try: run.hyperlink.address = hyperlink_url logger.debug(f"Applied hyperlink: '{hyperlink_text}' -> {hyperlink_url}") except Exception as e: logger.debug(f"Could not apply hyperlink: {e}") remaining_text = parts[1] if len(parts) > 1 else "" break # Add remaining text if remaining_text: run = paragraph.add_run() run.text = remaining_text default_formatting = next((r['formatting'] for r in runs_info if not r.get('hyperlink')), runs_info[0]['formatting'] if runs_info else {}) FormattingApplier._apply_run_formatting(run, default_formatting, target_language) except Exception as e: logger.error(f"Error applying hyperlinks: {e}") if not paragraph.runs: run = paragraph.add_run() run.text = line if para_info.get('runs'): FormattingApplier._apply_run_formatting(run, para_info['runs'][0]['formatting'], target_language) elif target_language: try: language_font = Config.get_font_for_language(target_language) run.font.name = language_font except Exception: pass @staticmethod def _has_hyperlinks(text_frame): """Check if text frame contains hyperlinks""" try: for paragraph in text_frame.paragraphs: for run in paragraph.runs: if hasattr(run, 'hyperlink') and run.hyperlink: if hasattr(run.hyperlink, 'address') and run.hyperlink.address: return True except Exception: pass return False @staticmethod def _find_hyperlink_text(translated_text: str, original_text: str): """Find text that should be hyperlinked""" # First try exact match if original_text in translated_text: return original_text # Common hyperlink patterns patterns = [ 'Boto3', 'Code samples', 'Starter Toolkit', 'samples', 'toolkit', '코드 샘플', '샘플', '툴킷', '스타터', 'Boto3', '코드' ] words = translated_text.split() for pattern in patterns: for word in words: if pattern.lower() in word.lower() or word.lower() in pattern.lower(): return word # Return first meaningful word meaningful_words = [word for word in words if len(word) > 2] return meaningful_words[0] if meaningful_words else None class ComplexityAnalyzer: """Analyzes slide complexity to determine translation strategy""" @staticmethod def slide_has_complex_formatting(text_items: List[Dict]) -> bool: """Check if slide has complex formatting including bullets and indentation""" for item in text_items: if item['type'] == 'text_frame_unified': if ComplexityAnalyzer._text_frame_has_complex_formatting(item['text_frame']): return True return False @staticmethod def _text_frame_has_complex_formatting(text_frame) -> bool: """Check if text frame has complex formatting""" for paragraph in text_frame.paragraphs: # Check for indentation (lists) if hasattr(paragraph, 'level') and paragraph.level and paragraph.level > 0: logger.debug(f"Found indented paragraph with level: {paragraph.level}") return True # Check for bullet formatting in XML if ComplexityAnalyzer._has_bullet_formatting(paragraph): logger.debug("Found bullet formatting in XML") return True # Check for multiple runs with different formatting if ComplexityAnalyzer._has_multiple_formatting_styles(paragraph): logger.debug("Found multiple formatting styles") return True return False @staticmethod def _has_bullet_formatting(paragraph) -> bool: """Check if paragraph has bullet formatting""" try: if not (hasattr(paragraph, '_element') and paragraph._element is not None): return False pPr = paragraph._element.find('.//{http://schemas.openxmlformats.org/drawingml/2006/main}pPr') if pPr is None: return False # Check for any bullet formatting elements bullet_elements = ['buFont', 'buChar', 'buAutoNum'] namespace = FormattingExtractor._get_namespace() for elem_name in bullet_elements: if pPr.find(f'.//{{{namespace}}}{elem_name}') is not None: return True except Exception as e: logger.debug(f"Could not check bullet formatting: {e}") return False @staticmethod def _has_multiple_formatting_styles(paragraph) -> bool: """Check if paragraph has multiple formatting styles""" if len(paragraph.runs) <= 1: return False colors = [] italic_states = [] for run in paragraph.runs: try: # Check colors if hasattr(run.font, 'color') and run.font.color: color = run.font.color if hasattr(color, 'type') and color.type == 1: # RGB colors.append(str(color.rgb)) elif hasattr(color, 'type') and color.type == 2: # Theme colors.append(f"theme_{color.theme_color}") # Check italic italic_states.append(run.font.italic if hasattr(run.font, 'italic') else None) except Exception: pass # If we have different colors or italic states return len(set(colors)) > 1 or len(set(italic_states)) > 1 class TranslationStrategy: """Handles different translation strategies""" def __init__(self, engine: TranslationEngine, text_updater: TextFrameUpdater): self.engine = engine self.text_updater = text_updater def translate_slide(self, slide, target_language: str) -> Tuple[int, bool]: """Translate a single slide using appropriate strategy""" text_items, notes_text = SlideTextCollector().collect_slide_texts(slide) translated_count = 0 notes_translated = False # Translate notes if present if notes_text: notes_translated = self._translate_notes(slide, notes_text, target_language) # Choose translation strategy if ComplexityAnalyzer.slide_has_complex_formatting(text_items): logger.info("🎨 Complex formatting detected, using individual translation") translated_count = self._translate_individually(text_items, target_language) elif len(text_items) > Config.CONTEXT_THRESHOLD: translated_count = self._translate_with_context(text_items, target_language) else: translated_count = self._translate_with_batch(text_items, target_language) return translated_count, notes_translated def _translate_notes(self, slide, notes_text: str, target_language: str) -> bool: """Translate slide notes""" try: translated_notes = self.engine.translate_text(notes_text, target_language) if translated_notes != notes_text: slide.notes_slide.notes_text_frame.text = translated_notes return True except Exception as e: logger.error(f"Error translating slide notes: {str(e)}") return False def _translate_individually(self, text_items: List[Dict], target_language: str) -> int: """Translate each text individually to preserve formatting with language-specific font""" translated_count = 0 logger.info("🎨 Using individual translation to preserve complex formatting") for i, item in enumerate(text_items): try: original_text = item['text'] if not original_text.strip(): continue logger.debug(f"Translating item {i+1}/{len(text_items)}: '{original_text[:50]}...'") translation = self.engine.translate_text(original_text, target_language) # Apply translation regardless of whether text changed (for font/color preservation) if self._apply_translation_to_item(item, translation, target_language): translated_count += 1 if original_text != translation: logger.debug(f"✅ Translated {item['type']}: '{original_text[:30]}...' -> '{translation[:30]}...'") else: logger.debug(f"🎨 Applied formatting to unchanged {item['type']}: '{original_text[:30]}...'") except Exception as e: logger.error(f"Individual translation failed for item {i}: {str(e)}") logger.info(f"🎯 Individual translation completed: {translated_count}/{len(text_items)} items processed") return translated_count def _translate_with_context(self, text_items: List[Dict], target_language: str) -> int: """Translate using context-aware approach with language-specific font""" if not text_items: return 0 try: translations = self.engine.translate_with_context(text_items, target_language) return self._apply_translations(text_items, translations, target_language) except Exception as e: logger.error(f"Context translation failed: {str(e)}") return self._translate_with_batch(text_items, target_language) def _translate_with_batch(self, text_items: List[Dict], target_language: str) -> int: """Translate using batch approach with language-specific font""" if not text_items: return 0 texts_to_translate = [item['text'] for item in text_items] translated_count = 0 # Process in batches for i in range(0, len(texts_to_translate), Config.BATCH_SIZE): batch_items = text_items[i:i + Config.BATCH_SIZE] batch_texts = texts_to_translate[i:i + Config.BATCH_SIZE] try: batch_translations = self.engine.translate_batch(batch_texts, target_language) translated_count += self._apply_translations(batch_items, batch_translations, target_language) except Exception as e: logger.error(f"Batch translation failed: {str(e)}") # Individual fallback for item in batch_items: try: translation = self.engine.translate_text(item['text'], target_language) if self._apply_translation_to_item(item, translation, target_language): translated_count += 1 except Exception: pass return translated_count def _apply_translations(self, text_items: List[Dict], translations: List[str], target_language: str = None) -> int: """Apply translations back to the original shapes with language-specific font""" if len(text_items) != len(translations): logger.error(f"Translation count mismatch: {len(text_items)} items, {len(translations)} translations") return 0 translated_count = 0 for item, translation in zip(text_items, translations): # Check if translation actually changed or if we should treat unchanged text as translated original_text = item['text'] is_actually_translated = original_text != translation # Apply translation (or preserve original with new formatting) if self._apply_translation_to_item(item, translation, target_language): translated_count += 1 # Log translation status if is_actually_translated: logger.debug(f"✅ Translated: '{original_text[:30]}...' -> '{translation[:30]}...'") else: logger.debug(f"🎨 Applied formatting to unchanged text: '{original_text[:30]}...'") return translated_count def _apply_translation_to_item(self, item: Dict, translation: str, target_language: str = None) -> bool: """Apply translation to a single item with language-specific font""" try: item_type = item['type'] if item_type == 'table_cell': cell = item['cell'] if hasattr(cell, 'text_frame') and cell.text_frame: self.text_updater.update_text_frame(cell.text_frame, translation, target_language) else: cell.text = translation # Apply language-specific font to table cell if target_language and hasattr(cell, 'text_frame') and cell.text_frame: try: language_font = Config.get_font_for_language(target_language) for paragraph in cell.text_frame.paragraphs: for run in paragraph.runs: run.font.name = language_font except Exception: pass return True elif item_type == 'text_frame_unified': text_frame = item['text_frame'] self.text_updater.update_text_frame(text_frame, translation, target_language) return True elif item_type == 'direct_text': item['shape'].text = translation # Apply language-specific font to direct text if target_language and hasattr(item['shape'], 'text_frame') and item['shape'].text_frame: try: language_font = Config.get_font_for_language(target_language) for paragraph in item['shape'].text_frame.paragraphs: for run in paragraph.runs: run.font.name = language_font except Exception: pass return True except Exception as e: logger.error(f"Error applying translation: {str(e)}") return False class PowerPointTranslator: """Main PowerPoint translation class""" def __init__(self, model_id: str = Config.DEFAULT_MODEL_ID, enable_polishing: bool = Config.ENABLE_POLISHING): self.model_id = model_id self.enable_polishing = enable_polishing self.config = Config() self.engine = TranslationEngine(model_id, enable_polishing) self.text_updater = TextFrameUpdater() self.strategy = TranslationStrategy(self.engine, self.text_updater) self.deps = DependencyManager() def translate_presentation(self, input_file: str, output_file: str, target_language: str) -> TranslationResult: """Translate entire PowerPoint presentation""" try: Presentation = self.deps.require('pptx') prs = Presentation(input_file) result = TranslationResult() total_slides = len(prs.slides) logger.info(f"🎯 Starting translation of {total_slides} slides...") logger.info(f"🎨 Translation mode: {'Natural/Polished' if self.enable_polishing else 'Literal'}") for slide_idx, slide in enumerate(prs.slides): logger.info(f"📄 Processing slide {slide_idx + 1}/{total_slides}") translated_count, notes_translated = self.strategy.translate_slide(slide, target_language) result.translated_count += translated_count if notes_translated: result.translated_notes_count += 1 result.total_shapes += len(slide.shapes) logger.info(f"✅ Slide {slide_idx + 1}: {translated_count} texts translated") # Save translated presentation prs.save(output_file) # Apply post-processing (autofit) post_processor = PostProcessor(config=self.config) post_processor.process_presentation(output_file, output_file) logger.info(f"🎉 Translation completed: {output_file}") logger.info(f"📊 Summary: {result.translated_count} texts, {result.translated_notes_count} notes") return result except Exception as e: logger.error(f"❌ Translation failed: {str(e)}") raise def translate_specific_slides(self, input_file: str, output_file: str, target_language: str, slide_numbers: List[int]) -> TranslationResult: """Translate specific slides in PowerPoint presentation""" try: Presentation = self.deps.require('pptx') prs = Presentation(input_file) result = TranslationResult() total_slides = len(prs.slides) # Validate slide numbers invalid_slides = [num for num in slide_numbers if num < 1 or num > total_slides] if invalid_slides: error_msg = f"Invalid slide numbers: {invalid_slides}. Valid range: 1-{total_slides}" logger.error(error_msg) result.errors.append(error_msg) return result # Remove duplicates and sort slide_numbers = sorted(list(set(slide_numbers))) logger.info(f"🎯 Starting translation of {len(slide_numbers)} specific slides: {slide_numbers}") logger.info(f"🎨 Translation mode: {'Natural/Polished' if self.enable_polishing else 'Literal'}") for slide_num in slide_numbers: slide_idx = slide_num - 1 # Convert to 0-based index slide = prs.slides[slide_idx] logger.info(f"📄 Processing slide {slide_num}/{total_slides}") translated_count, notes_translated = self.strategy.translate_slide(slide, target_language) result.translated_count += translated_count if notes_translated: result.translated_notes_count += 1 result.total_shapes += len(slide.shapes) logger.info(f"✅ Slide {slide_num}: {translated_count} texts translated") # Save translated presentation prs.save(output_file) # Apply post-processing (autofit) post_processor = PostProcessor(config=self.config) post_processor.process_presentation(output_file, output_file) logger.info(f"🎉 Translation completed: {output_file}") logger.info(f"📊 Summary: {result.translated_count} texts, {result.translated_notes_count} notes from {len(slide_numbers)} slides") return result except Exception as e: logger.error(f"❌ Translation failed: {str(e)}") raise def get_slide_count(self, input_file: str) -> int: """Get total number of slides in PowerPoint presentation""" try: Presentation = self.deps.require('pptx') prs = Presentation(input_file) return len(prs.slides) except Exception as e: logger.error(f"❌ Failed to get slide count: {str(e)}") raise def get_slide_preview(self, input_file: str, slide_number: int, max_chars: int = 200) -> str: """Get a preview of text content from a specific slide""" try: Presentation = self.deps.require('pptx') prs = Presentation(input_file) if slide_number < 1 or slide_number > len(prs.slides): raise ValueError(f"Invalid slide number: {slide_number}. Valid range: 1-{len(prs.slides)}") slide = prs.slides[slide_number - 1] # Convert to 0-based index text_items, notes_text = SlideTextCollector().collect_slide_texts(slide) # Collect all text content all_texts = [] for item in text_items: if item['text'].strip(): all_texts.append(item['text'].strip()) if notes_text and notes_text.strip(): all_texts.append(f"[Notes: {notes_text.strip()}]") # Join and truncate if necessary preview = " | ".join(all_texts) if len(preview) > max_chars: preview = preview[:max_chars] + "..." return preview if preview else "[No text content found]" except Exception as e: logger.error(f"❌ Failed to get slide preview: {str(e)}") raise

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/daekeun-ml/ppt-translator'

If you have feedback or need assistance with the MCP directory API, please join our Discord server