Skip to main content
Glama
table_operations.py120 kB
"""Table operations for Word documents.""" import re from typing import List, Optional, Dict, Any, Union from docx import Document from docx.table import Table, _Cell from docx.shared import Inches, Pt, RGBColor from docx.oxml.shared import qn, OxmlElement from docx.enum.text import WD_ALIGN_PARAGRAPH from ...models.responses import OperationResponse, ResponseStatus from ...models.tables import TableInfo, CellPosition, SearchResult, TableData, TableSearchMatch, TableSearchResult from ...models.table_analysis import ( TableStructureAnalysis, CellStyleAnalysis, TableAnalysisResult, MergeInfo, CellMergeType, analyze_cell_merge, extract_cell_formatting ) from ...models.formatting import TextFormat, CellAlignment from ...utils.exceptions import ( TableNotFoundError, InvalidTableIndexError, InvalidCellPositionError, TableOperationError, DataFormatError, DocumentNotFoundError, ) from ...utils.validation import ( validate_table_index, validate_cell_position, validate_table_data, validate_position_parameter, sanitize_string, ) from ...core.document_manager import DocumentManager from .formatting import TableFormattingOperations class TableOperations: """Handles table operations in Word documents.""" def __init__(self, document_manager: DocumentManager): """ Initialize table operations. Args: document_manager: Document manager instance """ self.document_manager = document_manager self.formatting = TableFormattingOperations(document_manager) def create_table( self, file_path: str, rows: int, cols: int, position: str = "end", paragraph_index: Optional[int] = None, headers: Optional[List[str]] = None, ) -> OperationResponse: """ Create a new table in the document. Args: file_path: Path to the document rows: Number of rows cols: Number of columns position: Where to insert the table paragraph_index: Paragraph index for 'after_paragraph' position headers: Optional header row data Returns: OperationResponse with operation result """ try: # Validate inputs if rows <= 0 or cols <= 0: return OperationResponse.error("Rows and columns must be positive integers") valid_positions = ["end", "beginning", "after_paragraph"] validate_position_parameter(position, valid_positions) if position == "after_paragraph" and paragraph_index is None: return OperationResponse.error("paragraph_index required for 'after_paragraph' position") if headers and len(headers) != cols: return OperationResponse.error(f"Headers length ({len(headers)}) must match columns ({cols})") # Get document document = self.document_manager.get_or_load_document(file_path) # Create table table = None if position == "end": table = document.add_table(rows=rows, cols=cols) elif position == "beginning": # Insert at beginning by adding after title or first paragraph if document.paragraphs: p = document.paragraphs[0] table = p.insert_paragraph_before().add_table(rows=rows, cols=cols) else: table = document.add_table(rows=rows, cols=cols) elif position == "after_paragraph": if paragraph_index < 0 or paragraph_index >= len(document.paragraphs): return OperationResponse.error(f"Invalid paragraph index: {paragraph_index}") p = document.paragraphs[paragraph_index] new_p = p.insert_paragraph_after() table = new_p._element.addnext(document.add_table(rows=rows, cols=cols)._element) table = document.tables[-1] # Get the newly added table if not table: return OperationResponse.error("Failed to create table") # Ensure the table uses Word's default inserted style (with borders) # Prefer the built-in "Table Grid" style; fall back gracefully if unavailable try: preferred_style_names = [ "Table Grid", # common English name "TableGrid", # underlying styleId often used "Normal Table" # broader fallback present in most documents ] applied = False if hasattr(document, "styles"): for style_name in preferred_style_names: try: style = document.styles[style_name] if style and getattr(style, "type", None).__str__().lower().endswith("table"): table.style = style applied = True break except Exception: continue if not applied: # Last-resort attempt: set by name; Word may resolve localized names table.style = "Table Grid" except Exception: # If styling fails, proceed without raising; Word will still render a usable table pass # Set headers if provided if headers: for col_idx, header in enumerate(headers): table.cell(0, col_idx).text = sanitize_string(header) table_index = len(document.tables) - 1 data = { "table_index": table_index, "rows": rows, "cols": cols, "position": position, "has_headers": bool(headers) } return OperationResponse.success(f"Table created with {rows} rows and {cols} columns", data) except Exception as e: return OperationResponse.error(f"Failed to create table: {str(e)}") def delete_table(self, file_path: str, table_index: int) -> OperationResponse: """ Delete a table from the document. Args: file_path: Path to the document table_index: Index of the table to delete Returns: OperationResponse with operation result """ try: document = self.document_manager.get_or_load_document(file_path) validate_table_index(table_index, len(document.tables)) # Get table and remove it table = document.tables[table_index] table._element.getparent().remove(table._element) return OperationResponse.success(f"Table {table_index} deleted") except (InvalidTableIndexError, TableNotFoundError) as e: return OperationResponse.error(str(e)) except Exception as e: return OperationResponse.error(f"Failed to delete table: {str(e)}") def add_table_rows( self, file_path: str, table_index: int, count: int = 1, row_index: Optional[int] = None, copy_style_from_row: Optional[int] = None, default_text_format: Optional[TextFormat] = None, default_alignment: Optional[CellAlignment] = None, default_background_color: Optional[str] = None, ) -> OperationResponse: """ Add rows to a table with optional styling control. Args: file_path: Path to the document table_index: Index of the table count: Number of rows to add row_index: Insert position indicator. - If None: append to end - If -1: insert before the first row - If x >= 0: insert after row x copy_style_from_row: Row index to copy style from (None = no style copying) default_text_format: Default text formatting for new cells default_alignment: Default alignment for new cells default_background_color: Default background color for new cells Returns: OperationResponse with operation result """ try: if count <= 0: return OperationResponse.error("Count must be a positive integer") document = self.document_manager.get_or_load_document(file_path) validate_table_index(table_index, len(document.tables)) table = document.tables[table_index] # Validate row_index range if provided if row_index is not None: if row_index < -1 or (table.rows and row_index > len(table.rows) - 1): return OperationResponse.error(f"Invalid row_index: {row_index}") # Determine reference row for style copying reference_row = None if copy_style_from_row is not None: # Explicit row specified if copy_style_from_row < 0 or copy_style_from_row >= len(table.rows): return OperationResponse.error(f"Invalid copy_style_from_row: {copy_style_from_row}") reference_row = table.rows[copy_style_from_row] else: # Default behavior: choose a sensible reference based on insertion point if len(table.rows) > 0: if row_index is None: # Appending to end -> copy from last row reference_row = table.rows[-1] elif row_index == -1: # Inserting before first -> copy from first row reference_row = table.rows[0] else: # Inserting after row_index -> copy from that row reference_row = table.rows[row_index] # Keep track of newly added rows for styling new_rows = [] original_row_count = len(table.rows) # Add rows for i in range(count): new_row = table.add_row() # Compute target insertion index: after row_index => insert at row_index+1 if row_index is None: # Append to end: nothing to move pass else: insert_at = 0 if row_index == -1 else (row_index + 1) # Account for previously inserted rows in this batch insert_at += i # If insert_at is within current rows, move before that index if insert_at < len(table.rows) - 1: target_row = table.rows[insert_at] target_row._element.addprevious(new_row._element) new_rows.append(new_row) # Apply styling to new rows self._apply_row_styling( new_rows, reference_row, default_text_format, default_alignment, default_background_color ) data = { "table_index": table_index, "rows_added": count, "new_row_count": len(table.rows), "insert_after_row_index": row_index, } return OperationResponse.success(f"Added {count} rows to table {table_index}", data) except (InvalidTableIndexError, InvalidCellPositionError) as e: return OperationResponse.error(str(e)) except Exception as e: return OperationResponse.error(f"Failed to add rows: {str(e)}") def add_table_columns( self, file_path: str, table_index: int, count: int = 1, column_index: Optional[int] = None, copy_style_from_column: Optional[int] = None, default_text_format: Optional[TextFormat] = None, default_alignment: Optional[CellAlignment] = None, default_background_color: Optional[str] = None, ) -> OperationResponse: """ Add columns to a table with optional styling control. Args: file_path: Path to the document table_index: Index of the table count: Number of columns to add column_index: Insert position indicator. - If None: append to end - If -1: insert before the first column - If x >= 0: insert after column x copy_style_from_column: Column index to copy style from (None = no style copying) default_text_format: Default text formatting for new cells default_alignment: Default alignment for new cells default_background_color: Default background color for new cells Returns: OperationResponse with operation result """ try: if count <= 0: return OperationResponse.error("Count must be a positive integer") document = self.document_manager.get_or_load_document(file_path) validate_table_index(table_index, len(document.tables)) table = document.tables[table_index] if not table.rows: return OperationResponse.error("Cannot add columns to empty table") original_cols = len(table.columns) # Validate column_index range if provided if column_index is not None: if column_index < -1 or column_index > original_cols - 1: return OperationResponse.error(f"Invalid column_index: {column_index}") # Determine reference column for style copying reference_column = None if copy_style_from_column is not None: # Explicit column specified if copy_style_from_column < 0 or copy_style_from_column >= original_cols: return OperationResponse.error(f"Invalid copy_style_from_column: {copy_style_from_column}") reference_column = [row.cells[copy_style_from_column] for row in table.rows] else: # Default behavior: choose a sensible reference based on insertion point if original_cols > 0: if column_index is None: # Appending to end -> copy from last column reference_column = [row.cells[-1] for row in table.rows] elif column_index == -1: # Inserting before first -> copy from first column reference_column = [row.cells[0] for row in table.rows] else: # Inserting after column_index -> copy from that column reference_column = [row.cells[column_index] for row in table.rows] # Add columns using the proper python-docx method from docx.shared import Inches for i in range(count): table.add_column(width=Inches(1)) if column_index is None: # Appending to end: nothing to move continue insert_at = 0 if column_index == -1 else (column_index + 1) insert_at += i # Move the last column to the specified position (before insert_at) if insert_at < len(table.columns) - 1: for row in table.rows: last_cell = row.cells[-1] target_cell = row.cells[insert_at] target_cell._element.addprevious(last_cell._element) # Apply styling to newly added columns if default_text_format or default_alignment or default_background_color or reference_column: self._apply_column_styling_after_add( table, original_cols, count, column_index, reference_column, default_text_format, default_alignment, default_background_color ) new_cols = len(table.columns) data = { "table_index": table_index, "columns_added": count, "new_column_count": new_cols, "insert_after_column_index": column_index, } return OperationResponse.success(f"Added {count} columns to table {table_index}", data) except (InvalidTableIndexError, InvalidCellPositionError) as e: return OperationResponse.error(str(e)) except Exception as e: return OperationResponse.error(f"Failed to add columns: {str(e)}") def delete_table_rows( self, file_path: str, table_index: int, row_indices: List[int] ) -> OperationResponse: """ Delete rows from a table. Args: file_path: Path to the document table_index: Index of the table row_indices: List of row indices to delete Returns: OperationResponse with operation result """ try: if not row_indices: return OperationResponse.error("No row indices provided") document = self.document_manager.get_or_load_document(file_path) validate_table_index(table_index, len(document.tables)) table = document.tables[table_index] # Validate all row indices for row_idx in row_indices: validate_cell_position(row_idx, 0, len(table.rows), len(table.columns)) # Sort indices in reverse order to delete from end to beginning sorted_indices = sorted(set(row_indices), reverse=True) # Delete rows for row_idx in sorted_indices: row = table.rows[row_idx] row._element.getparent().remove(row._element) data = { "table_index": table_index, "rows_deleted": len(sorted_indices), "remaining_rows": len(table.rows) } return OperationResponse.success( f"Deleted {len(sorted_indices)} rows from table {table_index}", data ) except (InvalidTableIndexError, InvalidCellPositionError) as e: return OperationResponse.error(str(e)) except Exception as e: return OperationResponse.error(f"Failed to delete rows: {str(e)}") def set_cell_value( self, file_path: str, table_index: int, row_index: int, column_index: int, value: str, text_format: Optional[TextFormat] = None, alignment: Optional[Dict[str, str]] = None, background_color: Optional[str] = None, borders: Optional[Dict[str, Dict[str, str]]] = None, preserve_existing_format: bool = True, # Additional formatting parameters for convenience font_family: Optional[str] = None, font_size: Optional[int] = None, font_color: Optional[str] = None, bold: Optional[bool] = None, italic: Optional[bool] = None, underline: Optional[bool] = None, horizontal_alignment: Optional[str] = None, vertical_alignment: Optional[str] = None, # Border parameters top_style: Optional[str] = None, top_width: Optional[str] = None, top_color: Optional[str] = None, bottom_style: Optional[str] = None, bottom_width: Optional[str] = None, bottom_color: Optional[str] = None, left_style: Optional[str] = None, left_width: Optional[str] = None, left_color: Optional[str] = None, right_style: Optional[str] = None, right_width: Optional[str] = None, right_color: Optional[str] = None ) -> OperationResponse: """ Set the value of a specific cell with optional formatting. Args: file_path: Path to the document table_index: Index of the table row_index: Row index column_index: Column index value: Value to set text_format: Optional text formatting (font, size, color, bold, italic, etc.) alignment: Optional alignment settings {"horizontal": "left/center/right", "vertical": "top/middle/bottom"} background_color: Optional background color as hex string (e.g., "FFFF00") borders: Optional border settings {"top/bottom/left/right": {"style": "solid", "width": "thin", "color": "000000"}} preserve_existing_format: Whether to preserve existing formatting when not specified # Convenience parameters (will override text_format, alignment, borders if provided) font_family: Optional font family (e.g., "Arial", "Times New Roman") font_size: Optional font size in points (8-72) font_color: Optional font color as hex string (e.g., "FF0000" for red) bold: Optional bold formatting italic: Optional italic formatting underline: Optional underline formatting horizontal_alignment: Optional horizontal alignment ("left", "center", "right", "justify") vertical_alignment: Optional vertical alignment ("top", "middle", "bottom") # Border convenience parameters top_style: Optional top border style ("solid", "dashed", "dotted", "double", "none") top_width: Optional top border width ("thin", "medium", "thick") top_color: Optional top border color as hex string bottom_style: Optional bottom border style bottom_width: Optional bottom border width bottom_color: Optional bottom border color as hex string left_style: Optional left border style left_width: Optional left border width left_color: Optional left border color as hex string right_style: Optional right border style right_width: Optional right border width right_color: Optional right border color as hex string Returns: OperationResponse with operation result """ try: from docx.shared import RGBColor from docx.enum.text import WD_ALIGN_PARAGRAPH # Process convenience parameters - they override the structured parameters if any([font_family, font_size, font_color, bold is not None, italic is not None, underline is not None]): text_format = TextFormat( font_family=font_family, font_size=font_size, font_color=font_color, bold=bold, italic=italic, underline=underline ) if horizontal_alignment or vertical_alignment: alignment = {} if horizontal_alignment: alignment["horizontal"] = horizontal_alignment if vertical_alignment: alignment["vertical"] = vertical_alignment if any([top_style, top_width, top_color, bottom_style, bottom_width, bottom_color, left_style, left_width, left_color, right_style, right_width, right_color]): borders = {} if any([top_style, top_width, top_color]): borders["top"] = { "style": top_style or "solid", "width": top_width or "thin", "color": top_color or "000000" } if any([bottom_style, bottom_width, bottom_color]): borders["bottom"] = { "style": bottom_style or "solid", "width": bottom_width or "thin", "color": bottom_color or "000000" } if any([left_style, left_width, left_color]): borders["left"] = { "style": left_style or "solid", "width": left_width or "thin", "color": left_color or "000000" } if any([right_style, right_width, right_color]): borders["right"] = { "style": right_style or "solid", "width": right_width or "thin", "color": right_color or "000000" } document = self.document_manager.get_or_load_document(file_path) validate_table_index(table_index, len(document.tables)) table = document.tables[table_index] validate_cell_position(row_index, column_index, len(table.rows), len(table.columns)) # Get cell and set value cell = table.cell(row_index, column_index) # Store existing formatting if preserve_existing_format is True existing_format = None if preserve_existing_format: existing_format = extract_cell_formatting(cell) # Clear existing content and set new value cell.text = sanitize_string(value) # Apply formatting if provided if cell.paragraphs: paragraph = cell.paragraphs[0] # Apply paragraph alignment if alignment and alignment.get('horizontal'): h_align = alignment['horizontal'].lower() alignment_map = { 'left': WD_ALIGN_PARAGRAPH.LEFT, 'center': WD_ALIGN_PARAGRAPH.CENTER, 'right': WD_ALIGN_PARAGRAPH.RIGHT, 'justify': WD_ALIGN_PARAGRAPH.JUSTIFY } if h_align in alignment_map: paragraph.alignment = alignment_map[h_align] elif preserve_existing_format and existing_format and existing_format.get('horizontal_alignment'): # Restore existing alignment h_align = existing_format['horizontal_alignment'] alignment_map = { 'left': WD_ALIGN_PARAGRAPH.LEFT, 'center': WD_ALIGN_PARAGRAPH.CENTER, 'right': WD_ALIGN_PARAGRAPH.RIGHT, 'justify': WD_ALIGN_PARAGRAPH.JUSTIFY } if h_align in alignment_map: paragraph.alignment = alignment_map[h_align] # Apply text formatting to runs if paragraph.runs: run = paragraph.runs[0] # Apply text formatting if preserve_existing_format and existing_format: # First restore existing text formatting if existing_format.get('font_family'): run.font.name = existing_format['font_family'] if existing_format.get('font_size'): from docx.shared import Pt run.font.size = Pt(existing_format['font_size']) if existing_format.get('font_color'): try: color_hex = existing_format['font_color'].lstrip('#') if len(color_hex) == 6: r = int(color_hex[0:2], 16) g = int(color_hex[2:4], 16) b = int(color_hex[4:6], 16) run.font.color.rgb = RGBColor(r, g, b) except (ValueError, AttributeError): pass if existing_format.get('is_bold') is not None: run.font.bold = existing_format['is_bold'] if existing_format.get('is_italic') is not None: run.font.italic = existing_format['is_italic'] if existing_format.get('is_underlined') is not None: run.font.underline = existing_format['is_underlined'] # Then apply new text formatting (overrides existing) if text_format: if text_format.font_family: run.font.name = text_format.font_family if text_format.font_size: from docx.shared import Pt run.font.size = Pt(text_format.font_size) if text_format.font_color: # Parse hex color try: color_hex = text_format.font_color.lstrip('#') if len(color_hex) == 6: r = int(color_hex[0:2], 16) g = int(color_hex[2:4], 16) b = int(color_hex[4:6], 16) run.font.color.rgb = RGBColor(r, g, b) except (ValueError, AttributeError): pass # Skip invalid color if text_format.bold is not None: run.font.bold = text_format.bold if text_format.italic is not None: run.font.italic = text_format.italic if text_format.underline is not None: run.font.underline = text_format.underline # Apply vertical alignment if provided if alignment and alignment.get('vertical'): try: from docx.oxml.shared import qn, OxmlElement v_align = alignment['vertical'].lower() alignment_map = { 'top': 'top', 'middle': 'center', 'bottom': 'bottom' } if v_align in alignment_map: tc_pr = cell._element.get_or_add_tcPr() # Remove existing vAlign if present existing_valign = tc_pr.find(qn('w:vAlign')) if existing_valign is not None: tc_pr.remove(existing_valign) # Add new vAlign valign_element = OxmlElement('w:vAlign') valign_element.set(qn('w:val'), alignment_map[v_align]) tc_pr.append(valign_element) except Exception: pass # Skip if vertical alignment application fails elif preserve_existing_format and existing_format and existing_format.get('vertical_alignment'): # Restore existing vertical alignment try: from docx.oxml.shared import qn, OxmlElement v_align = existing_format['vertical_alignment'].lower() alignment_map = { 'top': 'top', 'middle': 'center', 'bottom': 'bottom' } if v_align in alignment_map: tc_pr = cell._element.get_or_add_tcPr() # Remove existing vAlign if present existing_valign = tc_pr.find(qn('w:vAlign')) if existing_valign is not None: tc_pr.remove(existing_valign) # Add new vAlign valign_element = OxmlElement('w:vAlign') valign_element.set(qn('w:val'), alignment_map[v_align]) tc_pr.append(valign_element) except Exception: pass # Apply background color if provided if background_color: try: # Apply cell shading using proper XML construction from docx.oxml.shared import qn from docx.oxml import parse_xml tc_pr = cell._element.get_or_add_tcPr() # Remove existing shading if present existing_shd = tc_pr.find(qn('w:shd')) if existing_shd is not None: tc_pr.remove(existing_shd) # Create new shading element with proper namespace shd_xml = f'''<w:shd xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" w:val="clear" w:color="auto" w:fill="{background_color.lstrip('#')}"/>''' shd_element = parse_xml(shd_xml) tc_pr.append(shd_element) except Exception: pass # Skip if background color application fails elif preserve_existing_format and existing_format and existing_format.get('background_color'): # Restore existing background color try: from docx.oxml.shared import qn from docx.oxml import parse_xml tc_pr = cell._element.get_or_add_tcPr() # Remove existing shading if present existing_shd = tc_pr.find(qn('w:shd')) if existing_shd is not None: tc_pr.remove(existing_shd) # Create new shading element with proper namespace shd_xml = f'''<w:shd xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" w:val="clear" w:color="auto" w:fill="{existing_format["background_color"].lstrip('#')}"/>''' shd_element = parse_xml(shd_xml) tc_pr.append(shd_element) except Exception: pass # Apply borders if provided if borders: try: from docx.oxml.shared import qn from docx.oxml import parse_xml tc_pr = cell._element.get_or_add_tcPr() # Remove existing borders if present existing_borders = tc_pr.find(qn('w:tcBorders')) if existing_borders is not None: tc_pr.remove(existing_borders) # Create new borders element borders_xml = '<w:tcBorders xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">' for side, border_props in borders.items(): style = border_props.get('style', 'solid') width = border_props.get('width', 'thin') color = border_props.get('color', '000000').lstrip('#') # Map style names to Word constants style_map = { 'solid': 'single', 'dashed': 'dashed', 'dotted': 'dotted', 'double': 'double', 'none': 'none' } # Map width names to Word constants width_map = { 'thin': '4', 'medium': '12', 'thick': '24' } word_style = style_map.get(style, 'single') word_width = width_map.get(width, '4') borders_xml += f'<w:{side} w:val="{word_style}" w:sz="{word_width}" w:color="{color}"/>' borders_xml += '</w:tcBorders>' borders_element = parse_xml(borders_xml) tc_pr.append(borders_element) except Exception: pass # Skip if border application fails # Get final formatting for response final_format = extract_cell_formatting(cell) data = { "table_index": table_index, "row_index": row_index, "column_index": column_index, "value": cell.text, "applied_formatting": { "text_format": { "font_family": final_format.get('font_family'), "font_size": final_format.get('font_size'), "font_color": final_format.get('font_color'), "bold": final_format.get('is_bold', False), "italic": final_format.get('is_italic', False), "underlined": final_format.get('is_underlined', False) }, "alignment": { "horizontal": final_format.get('horizontal_alignment'), "vertical": final_format.get('vertical_alignment') }, "background_color": final_format.get('background_color') } } return OperationResponse.success( f"Cell value and formatting set at table {table_index}, row {row_index}, column {column_index}", data ) except (InvalidTableIndexError, InvalidCellPositionError) as e: return OperationResponse.error(str(e)) except Exception as e: return OperationResponse.error(f"Failed to set cell value: {str(e)}") def set_multiple_cells( self, file_path: str, table_index: int, cells: List[Dict[str, Any]], preserve_existing_format: bool = True ) -> OperationResponse: """ Set values and formatting for multiple cells in a table. This is a batch operation that allows setting multiple cells at once, which is more efficient than calling set_cell_value multiple times. Args: file_path: Path to the document table_index: Index of the table cells: List of cell data dictionaries preserve_existing_format: Whether to preserve existing formatting when not specified Returns: OperationResponse with operation result """ try: if not cells: return OperationResponse.error("No cells provided") document = self.document_manager.get_or_load_document(file_path) validate_table_index(table_index, len(document.tables)) table = document.tables[table_index] results = [] errors = [] for i, cell_data in enumerate(cells): try: # Extract required fields row_index = cell_data.get('row_index') column_index = cell_data.get('column_index') value = cell_data.get('value', '') if row_index is None or column_index is None: errors.append(f"Cell {i}: Missing row_index or column_index") continue # Validate cell position validate_cell_position(row_index, column_index, len(table.rows), len(table.columns)) # Extract formatting parameters font_family = cell_data.get('font_family') font_size = cell_data.get('font_size') font_color = cell_data.get('font_color') bold = cell_data.get('bold') italic = cell_data.get('italic') underline = cell_data.get('underline') horizontal_alignment = cell_data.get('horizontal_alignment') vertical_alignment = cell_data.get('vertical_alignment') background_color = cell_data.get('background_color') # Extract border parameters top_style = cell_data.get('top_style') top_width = cell_data.get('top_width') top_color = cell_data.get('top_color') bottom_style = cell_data.get('bottom_style') bottom_width = cell_data.get('bottom_width') bottom_color = cell_data.get('bottom_color') left_style = cell_data.get('left_style') left_width = cell_data.get('left_width') left_color = cell_data.get('left_color') right_style = cell_data.get('right_style') right_width = cell_data.get('right_width') right_color = cell_data.get('right_color') # Build text format if any text formatting is specified text_format = None if any([font_family, font_size, font_color, bold is not None, italic is not None, underline is not None]): text_format = TextFormat( font_family=font_family, font_size=font_size, font_color=font_color, bold=bold, italic=italic, underline=underline ) # Build alignment if any alignment is specified alignment = None if horizontal_alignment or vertical_alignment: alignment = {} if horizontal_alignment: alignment["horizontal"] = horizontal_alignment if vertical_alignment: alignment["vertical"] = vertical_alignment # Build borders if any border parameters are specified borders = None if any([top_style, top_width, top_color, bottom_style, bottom_width, bottom_color, left_style, left_width, left_color, right_style, right_width, right_color]): borders = {} if any([top_style, top_width, top_color]): borders["top"] = { "style": top_style or "solid", "width": top_width or "thin", "color": top_color or "000000" } if any([bottom_style, bottom_width, bottom_color]): borders["bottom"] = { "style": bottom_style or "solid", "width": bottom_width or "thin", "color": bottom_color or "000000" } if any([left_style, left_width, left_color]): borders["left"] = { "style": left_style or "solid", "width": left_width or "thin", "color": left_color or "000000" } if any([right_style, right_width, right_color]): borders["right"] = { "style": right_style or "solid", "width": right_width or "thin", "color": right_color or "000000" } # Set the cell value using existing method cell_result = self.set_cell_value( file_path, table_index, row_index, column_index, value, text_format=text_format, alignment=alignment, background_color=background_color, borders=borders, preserve_existing_format=preserve_existing_format ) if cell_result.status == ResponseStatus.SUCCESS: results.append({ "row_index": row_index, "column_index": column_index, "value": value, "status": "success" }) else: errors.append(f"Cell {i} (row {row_index}, col {column_index}): {cell_result.message}") except (InvalidTableIndexError, InvalidCellPositionError) as e: errors.append(f"Cell {i}: {str(e)}") except Exception as e: errors.append(f"Cell {i}: {str(e)}") # Prepare response data data = { "table_index": table_index, "total_cells": len(cells), "successful_cells": len(results), "failed_cells": len(errors), "results": results, "errors": errors } if errors: if results: return OperationResponse.success( f"Batch operation completed with {len(results)} successes and {len(errors)} failures", data ) else: return OperationResponse.error( f"All {len(cells)} cells failed to update", data ) else: return OperationResponse.success( f"Successfully updated {len(results)} cells", data ) except (InvalidTableIndexError, InvalidCellPositionError) as e: return OperationResponse.error(str(e)) except Exception as e: return OperationResponse.error(f"Failed to set multiple cells: {str(e)}") def get_cell_value( self, file_path: str, table_index: int, row_index: int, column_index: int, include_formatting: bool = True ) -> OperationResponse: """ Get the value and formatting of a specific cell. Args: file_path: Path to the document table_index: Index of the table row_index: Row index column_index: Column index include_formatting: Whether to include detailed formatting information Returns: OperationResponse with cell value and formatting """ try: document = self.document_manager.get_or_load_document(file_path) validate_table_index(table_index, len(document.tables)) table = document.tables[table_index] validate_cell_position(row_index, column_index, len(table.rows), len(table.columns)) # Get cell and its value cell = table.cell(row_index, column_index) value = cell.text data = { "table_index": table_index, "row_index": row_index, "column_index": column_index, "value": value, "is_empty": not value.strip() } # Include formatting information if requested if include_formatting: cell_format = extract_cell_formatting(cell) merge_info = analyze_cell_merge(cell, row_index, column_index) data["formatting"] = { "text_format": { "font_family": cell_format.get('font_family'), "font_size": cell_format.get('font_size'), "font_color": cell_format.get('font_color'), "bold": cell_format.get('is_bold', False), "italic": cell_format.get('is_italic', False), "underlined": cell_format.get('is_underlined', False), "strikethrough": cell_format.get('is_strikethrough', False) }, "alignment": { "horizontal": cell_format.get('horizontal_alignment'), "vertical": cell_format.get('vertical_alignment') }, "background_color": cell_format.get('background_color'), "borders": self._extract_border_data(cell_format.get('borders', {})) } # Include merge information if cell is merged if merge_info: data["merge_info"] = { "type": merge_info.merge_type.value, "start_row": merge_info.start_row, "end_row": merge_info.end_row, "start_col": merge_info.start_col, "end_col": merge_info.end_col, "span_rows": merge_info.span_rows, "span_cols": merge_info.span_cols } else: data["merge_info"] = None message = "Cell value retrieved" if include_formatting: message += " with formatting" return OperationResponse.success(message, data) except (InvalidTableIndexError, InvalidCellPositionError) as e: return OperationResponse.error(str(e)) except Exception as e: return OperationResponse.error(f"Failed to get cell value: {str(e)}") def get_table_data_and_structure( self, file_path: str, table_index: int, start_row: int = 0, end_row: Optional[int] = None, start_col: int = 0, end_col: Optional[int] = None, include_headers: bool = True, format_type: str = "array", ) -> OperationResponse: """ Get table data and structure information within specified range. This interface returns table content, merge information, and basic structure without detailed cell formatting to keep response size manageable. Args: file_path: Path to the document table_index: Index of the table start_row: Starting row index (0-based, inclusive) end_row: Ending row index (0-based, exclusive). None means to the end start_col: Starting column index (0-based, inclusive) end_col: Ending column index (0-based, exclusive). None means to the end include_headers: Whether to include headers format_type: Format of returned data ('array', 'object', 'csv') Returns: OperationResponse with table data and structure """ try: valid_formats = ["array", "object", "csv"] if format_type not in valid_formats: return OperationResponse.error(f"Invalid format. Valid options: {', '.join(valid_formats)}") document = self.document_manager.get_or_load_document(file_path) validate_table_index(table_index, len(document.tables)) table = document.tables[table_index] if not table.rows: return OperationResponse.success("Table is empty", { "table_index": table_index, "format": format_type, "rows": 0, "columns": 0, "has_headers": False, "headers": None, "data": [], "merge_regions": [], "range_info": { "total_rows": 0, "total_columns": 0, "requested_range": { "start_row": start_row, "end_row": end_row, "start_col": start_col, "end_col": end_col }, "rows_returned": 0, "columns_returned": 0, "has_more_rows": False, "has_more_cols": False } }) total_rows = len(table.rows) total_cols = len(table.columns) if table.rows else 0 # Validate and adjust row range if start_row < 0: start_row = 0 if start_row >= total_rows: return OperationResponse.error(f"start_row ({start_row}) exceeds table rows ({total_rows})") if end_row is None: end_row = total_rows elif end_row > total_rows: end_row = total_rows elif end_row <= start_row: return OperationResponse.error("end_row must be greater than start_row") # Validate and adjust column range if start_col < 0: start_col = 0 if start_col >= total_cols: return OperationResponse.error(f"start_col ({start_col}) exceeds table columns ({total_cols})") if end_col is None: end_col = total_cols elif end_col > total_cols: end_col = total_cols elif end_col <= start_col: return OperationResponse.error("end_col must be greater than start_col") # Extract headers if requested headers = None data_start_row = start_row if include_headers and table.rows and start_row == 0: header_cells = [cell.text for cell in table.rows[0].cells[start_col:end_col]] # Only treat as headers if at least one cell has non-empty content if any(cell.strip() for cell in header_cells): headers = header_cells data_start_row = 1 # Adjust end_row if we're including headers if end_row > 1: end_row = max(1, end_row) # Extract data rows data = [] merge_regions = [] for row_idx in range(data_start_row, end_row): row = table.rows[row_idx] row_data = [] for col_idx in range(start_col, end_col): if col_idx < len(row.cells): cell = row.cells[col_idx] cell_text = cell.text row_data.append(cell_text) # Check for merge information merge_info = analyze_cell_merge(cell, row_idx, col_idx) if merge_info and merge_info.merge_type != CellMergeType.NONE: # Adjust merge coordinates to relative range relative_merge = { "type": merge_info.merge_type.value, "start_row": max(0, merge_info.start_row - start_row), "end_row": min(end_row - start_row - 1, merge_info.end_row - start_row), "start_col": max(0, merge_info.start_col - start_col), "end_col": min(end_col - start_col - 1, merge_info.end_col - start_col), "span_rows": merge_info.span_rows, "span_cols": merge_info.span_cols, "absolute_position": { "start_row": merge_info.start_row, "end_row": merge_info.end_row, "start_col": merge_info.start_col, "end_col": merge_info.end_col } } merge_regions.append(relative_merge) else: row_data.append("") data.append(row_data) # Format data according to requested format if format_type == "array": result_data = data if include_headers and headers and start_row == 0: result_data = [headers] + data elif format_type == "object": if headers: result_data = [] for row in data: row_dict = {} for i, value in enumerate(row): header = headers[i] if i < len(headers) else f"Column_{i + start_col}" row_dict[header] = value result_data.append(row_dict) else: result_data = [{"Column_" + str(i + start_col): value for i, value in enumerate(row)} for row in data] elif format_type == "csv": result_data = [] if include_headers and headers and start_row == 0: result_data.append(headers) result_data.extend(data) # Calculate range info rows_returned = len(data) if include_headers and headers and start_row == 0: rows_returned += 1 range_info = { "total_rows": total_rows, "total_columns": total_cols, "requested_range": { "start_row": start_row, "end_row": end_row, "start_col": start_col, "end_col": end_col }, "rows_returned": rows_returned, "columns_returned": end_col - start_col, "has_more_rows": end_row < total_rows, "has_more_cols": end_col < total_cols } response_data = { "table_index": table_index, "format": format_type, "rows": len(data), "columns": len(data[0]) if data else 0, "has_headers": bool(headers), "headers": headers, "data": result_data, "merge_regions": merge_regions, "range_info": range_info } return OperationResponse.success( f"Table data and structure retrieved in {format_type} format (rows {start_row}-{end_row-1}, cols {start_col}-{end_col-1})", response_data ) except (InvalidTableIndexError,) as e: return OperationResponse.error(str(e)) except Exception as e: return OperationResponse.error(f"Failed to get table data and structure: {str(e)}") def get_table_styles( self, file_path: str, table_index: int, start_row: int = 0, end_row: Optional[int] = None, start_col: int = 0, end_col: Optional[int] = None, ) -> OperationResponse: """ Get table cell styles and formatting information within specified range. This interface returns detailed cell formatting information including fonts, colors, alignment, borders, and background colors. Args: file_path: Path to the document table_index: Index of the table start_row: Starting row index (0-based, inclusive) end_row: Ending row index (0-based, exclusive). None means to the end start_col: Starting column index (0-based, inclusive) end_col: Ending column index (0-based, exclusive). None means to the end Returns: OperationResponse with table cell styles """ try: document = self.document_manager.get_or_load_document(file_path) validate_table_index(table_index, len(document.tables)) table = document.tables[table_index] if not table.rows: return OperationResponse.success("Table is empty", {"styles": []}) total_rows = len(table.rows) total_cols = len(table.columns) if table.rows else 0 # Validate and adjust row range if start_row < 0: start_row = 0 if start_row >= total_rows: return OperationResponse.error(f"start_row ({start_row}) exceeds table rows ({total_rows})") if end_row is None: end_row = total_rows elif end_row > total_rows: end_row = total_rows elif end_row <= start_row: return OperationResponse.error("end_row must be greater than start_row") # Validate and adjust column range if start_col < 0: start_col = 0 if start_col >= total_cols: return OperationResponse.error(f"start_col ({start_col}) exceeds table columns ({total_cols})") if end_col is None: end_col = total_cols elif end_col > total_cols: end_col = total_cols elif end_col <= start_col: return OperationResponse.error("end_col must be greater than start_col") # Extract cell styles cell_styles = [] style_summary = { "font_families": set(), "font_sizes": set(), "colors": set(), "background_colors": set(), "alignments": set(), "border_styles": set() } for row_idx in range(start_row, end_row): row = table.rows[row_idx] row_styles = [] for col_idx in range(start_col, end_col): if col_idx < len(row.cells): cell = row.cells[col_idx] # Extract comprehensive formatting formatting = extract_cell_formatting(cell) merge_info = analyze_cell_merge(cell, row_idx, col_idx) # Build cell style object cell_style = { "position": { "row": row_idx, "column": col_idx, "relative_row": row_idx - start_row, "relative_column": col_idx - start_col }, "text_format": { "font_family": formatting["font_family"], "font_size": formatting["font_size"], "font_color": formatting["font_color"], "bold": formatting["is_bold"], "italic": formatting["is_italic"], "underlined": formatting["is_underlined"], "strikethrough": formatting["is_strikethrough"] }, "alignment": { "horizontal": formatting["horizontal_alignment"], "vertical": formatting["vertical_alignment"] }, "background": { "color": formatting["background_color"] }, "borders": { "top": formatting["borders"]["top"], "bottom": formatting["borders"]["bottom"], "left": formatting["borders"]["left"], "right": formatting["borders"]["right"] } } # Add merge information if present if merge_info and merge_info.merge_type != CellMergeType.NONE: cell_style["merge"] = { "type": merge_info.merge_type.value, "start_row": merge_info.start_row, "end_row": merge_info.end_row, "start_col": merge_info.start_col, "end_col": merge_info.end_col, "span_rows": merge_info.span_rows, "span_cols": merge_info.span_cols } else: cell_style["merge"] = None row_styles.append(cell_style) # Update style summary if formatting["font_family"]: style_summary["font_families"].add(formatting["font_family"]) if formatting["font_size"]: style_summary["font_sizes"].add(formatting["font_size"]) if formatting["font_color"]: style_summary["colors"].add(formatting["font_color"]) if formatting["background_color"]: style_summary["background_colors"].add(formatting["background_color"]) if formatting["horizontal_alignment"]: style_summary["alignments"].add(formatting["horizontal_alignment"]) # Track border styles for border_side, border_info in formatting["borders"].items(): if border_info and border_info.get("style"): style_summary["border_styles"].add(border_info["style"]) else: # Empty cell placeholder row_styles.append({ "position": { "row": row_idx, "column": col_idx, "relative_row": row_idx - start_row, "relative_column": col_idx - start_col }, "text_format": None, "alignment": None, "background": None, "borders": None, "merge": None }) cell_styles.append(row_styles) # Convert sets to lists for JSON serialization style_summary = { "font_families": list(style_summary["font_families"]), "font_sizes": list(style_summary["font_sizes"]), "colors": list(style_summary["colors"]), "background_colors": list(style_summary["background_colors"]), "alignments": list(style_summary["alignments"]), "border_styles": list(style_summary["border_styles"]) } # Calculate range info range_info = { "total_rows": total_rows, "total_columns": total_cols, "requested_range": { "start_row": start_row, "end_row": end_row, "start_col": start_col, "end_col": end_col }, "rows_returned": end_row - start_row, "columns_returned": end_col - start_col, "has_more_rows": end_row < total_rows, "has_more_cols": end_col < total_cols } response_data = { "table_index": table_index, "cell_styles": cell_styles, "style_summary": style_summary, "range_info": range_info } return OperationResponse.success( f"Table styles retrieved (rows {start_row}-{end_row-1}, cols {start_col}-{end_col-1})", response_data ) except (InvalidTableIndexError,) as e: return OperationResponse.error(str(e)) except Exception as e: return OperationResponse.error(f"Failed to get table styles: {str(e)}") def list_tables(self, file_path: str, include_summary: bool = True) -> OperationResponse: """ List all tables in the document. Args: file_path: Path to the document include_summary: Whether to include table summary information Returns: OperationResponse with list of tables """ try: document = self.document_manager.get_or_load_document(file_path) tables = [] for i, table in enumerate(document.tables): table_info = { "index": i, "rows": len(table.rows), "columns": len(table.columns) if table.rows else 0, } if include_summary: # Check if has headers (simple heuristic) has_headers = False if table.rows: first_row_has_text = all(cell.text.strip() for cell in table.rows[0].cells) has_headers = first_row_has_text table_info.update({ "has_headers": has_headers, "style": getattr(table.style, 'name', None) if table.style else None, "first_row_data": [cell.text for cell in table.rows[0].cells] if table.rows else [] }) tables.append(table_info) data = { "tables": tables, "total_count": len(tables) } return OperationResponse.success(f"Found {len(tables)} tables", data) except Exception as e: return OperationResponse.error(f"Failed to list tables: {str(e)}") def search_table_content( self, file_path: str, query: str, search_mode: str = "contains", case_sensitive: bool = False, table_indices: Optional[List[int]] = None, max_results: Optional[int] = None ) -> OperationResponse: """ Search for content within table cells. Args: file_path: Path to the document query: Search query string search_mode: Search mode ("exact", "contains", "regex") case_sensitive: Whether search is case sensitive table_indices: Optional list of table indices to search (None = all tables) max_results: Maximum number of results to return (None = no limit) Returns: OperationResponse with search results """ try: if not query.strip(): return OperationResponse.error("Search query cannot be empty") valid_modes = ["exact", "contains", "regex"] if search_mode not in valid_modes: return OperationResponse.error(f"Invalid search mode. Valid options: {', '.join(valid_modes)}") document = self.document_manager.get_or_load_document(file_path) # Determine which tables to search if table_indices is None: tables_to_search = list(range(len(document.tables))) else: # Validate table indices for idx in table_indices: validate_table_index(idx, len(document.tables)) tables_to_search = table_indices matches = [] summary = { "tables_with_matches": 0, "matches_per_table": {}, "total_cells_searched": 0 } # Compile regex pattern if needed pattern = None if search_mode == "regex": try: flags = 0 if case_sensitive else re.IGNORECASE pattern = re.compile(query, flags) except re.error as e: return OperationResponse.error(f"Invalid regex pattern: {str(e)}") # Search each table for table_idx in tables_to_search: table = document.tables[table_idx] table_matches = 0 for row_idx, row in enumerate(table.rows): for col_idx, cell in enumerate(row.cells): cell_text = cell.text summary["total_cells_searched"] += 1 # Perform search based on mode cell_matches = self._search_cell_content( cell_text, query, search_mode, case_sensitive, pattern ) # Create match objects for match_info in cell_matches: if max_results and len(matches) >= max_results: break match = TableSearchMatch( table_index=table_idx, row_index=row_idx, column_index=col_idx, cell_value=cell_text, match_text=match_info["text"], match_start=match_info["start"], match_end=match_info["end"] ) matches.append(match) table_matches += 1 if max_results and len(matches) >= max_results: break if max_results and len(matches) >= max_results: break if table_matches > 0: summary["tables_with_matches"] += 1 summary["matches_per_table"][table_idx] = table_matches # Create search result search_result = TableSearchResult( query=query, search_mode=search_mode, case_sensitive=case_sensitive, matches=matches, total_matches=len(matches), tables_searched=tables_to_search, summary=summary ) message = f"Found {len(matches)} matches in {summary['tables_with_matches']} tables" if max_results and len(matches) >= max_results: message += f" (limited to {max_results} results)" return OperationResponse.success(message, search_result.to_dict()) except (InvalidTableIndexError,) as e: return OperationResponse.error(str(e)) except Exception as e: return OperationResponse.error(f"Failed to search table content: {str(e)}") def _search_cell_content( self, cell_text: str, query: str, search_mode: str, case_sensitive: bool, pattern: Optional[re.Pattern] = None ) -> List[Dict[str, Any]]: """ Search for matches within a single cell's content. Args: cell_text: The cell's text content query: Search query search_mode: Search mode case_sensitive: Case sensitivity flag pattern: Compiled regex pattern (for regex mode) Returns: List of match information dictionaries """ matches = [] if not cell_text: return matches if search_mode == "exact": # Exact match search_text = cell_text if case_sensitive else cell_text.lower() query_text = query if case_sensitive else query.lower() if search_text == query_text: matches.append({ "text": cell_text, "start": 0, "end": len(cell_text) }) elif search_mode == "contains": # Contains match search_text = cell_text if case_sensitive else cell_text.lower() query_text = query if case_sensitive else query.lower() start = 0 while True: pos = search_text.find(query_text, start) if pos == -1: break matches.append({ "text": cell_text[pos:pos + len(query)], "start": pos, "end": pos + len(query) }) start = pos + 1 elif search_mode == "regex": # Regex match if pattern: for match in pattern.finditer(cell_text): matches.append({ "text": match.group(), "start": match.start(), "end": match.end() }) return matches def search_table_headers( self, file_path: str, query: str, search_mode: str = "contains", case_sensitive: bool = False ) -> OperationResponse: """ Search specifically in table headers (first row of each table). Args: file_path: Path to the document query: Search query string search_mode: Search mode ("exact", "contains", "regex") case_sensitive: Whether search is case sensitive Returns: OperationResponse with search results """ try: if not query.strip(): return OperationResponse.error("Search query cannot be empty") document = self.document_manager.get_or_load_document(file_path) matches = [] tables_with_headers = 0 # Search only first row of each table for table_idx, table in enumerate(document.tables): if not table.rows: continue first_row = table.rows[0] has_header_matches = False for col_idx, cell in enumerate(first_row.cells): cell_text = cell.text # Use the same search logic as general search pattern = None if search_mode == "regex": try: flags = 0 if case_sensitive else re.IGNORECASE pattern = re.compile(query, flags) except re.error as e: return OperationResponse.error(f"Invalid regex pattern: {str(e)}") cell_matches = self._search_cell_content( cell_text, query, search_mode, case_sensitive, pattern ) for match_info in cell_matches: match = TableSearchMatch( table_index=table_idx, row_index=0, # Always first row for headers column_index=col_idx, cell_value=cell_text, match_text=match_info["text"], match_start=match_info["start"], match_end=match_info["end"] ) matches.append(match) has_header_matches = True if has_header_matches: tables_with_headers += 1 # Create search result search_result = TableSearchResult( query=query, search_mode=search_mode, case_sensitive=case_sensitive, matches=matches, total_matches=len(matches), tables_searched=list(range(len(document.tables))), summary={ "search_type": "headers_only", "tables_with_header_matches": tables_with_headers, "total_tables": len(document.tables) } ) message = f"Found {len(matches)} header matches in {tables_with_headers} tables" return OperationResponse.success(message, search_result.to_dict()) except Exception as e: return OperationResponse.error(f"Failed to search table headers: {str(e)}") def analyze_table_structure( self, file_path: str, table_index: int, include_cell_details: bool = True ) -> OperationResponse: """ Analyze the complete structure and styling of a specific table. Args: file_path: Path to the document table_index: Index of the table to analyze include_cell_details: Whether to include detailed cell analysis Returns: OperationResponse with comprehensive table analysis """ try: document = self.document_manager.get_or_load_document(file_path) validate_table_index(table_index, len(document.tables)) table = document.tables[table_index] # Basic table information total_rows = len(table.rows) total_columns = len(table.columns) if table.rows else 0 # Table-level properties table_style_name = getattr(table.style, 'name', None) if table.style else None # Header detection has_header_row = False header_row_index = None header_cells = None if table.rows: # Simple heuristic: if first row has text in all cells, consider it header first_row = table.rows[0] first_row_texts = [cell.text.strip() for cell in first_row.cells] has_header_row = all(text for text in first_row_texts) if has_header_row: header_row_index = 0 header_cells = first_row_texts # Initialize cell analysis storage cells = [] merge_regions = [] merged_cells_count = 0 # Style tracking for consistency analysis font_families = set() font_sizes = set() colors = set() background_colors = set() alignments = set() border_styles = set() # Analyze each cell for row_idx, row in enumerate(table.rows): cell_row = [] for col_idx, cell in enumerate(row.cells): # Extract cell content text_content = cell.text is_empty = not text_content.strip() # Analyze merge information merge_info = analyze_cell_merge(cell, row_idx, col_idx) if merge_info: merge_regions.append(merge_info) merged_cells_count += 1 # Extract formatting if detailed analysis is requested cell_analysis = None if include_cell_details: formatting = extract_cell_formatting(cell) # Track unique styles if formatting["font_family"]: font_families.add(formatting["font_family"]) if formatting["font_size"]: font_sizes.add(formatting["font_size"]) if formatting["font_color"]: colors.add(formatting["font_color"]) if formatting["background_color"]: background_colors.add(formatting["background_color"]) if formatting["horizontal_alignment"]: alignments.add(formatting["horizontal_alignment"]) # Track border styles for border_side, border_info in formatting["borders"].items(): if border_info and border_info.get("style"): border_styles.add(border_info["style"]) cell_analysis = CellStyleAnalysis( row_index=row_idx, column_index=col_idx, text_content=text_content, is_empty=is_empty, merge_info=merge_info, font_family=formatting["font_family"], font_size=formatting["font_size"], font_color=formatting["font_color"], is_bold=formatting["is_bold"], is_italic=formatting["is_italic"], is_underlined=formatting["is_underlined"], is_strikethrough=formatting["is_strikethrough"], horizontal_alignment=formatting["horizontal_alignment"], vertical_alignment=formatting["vertical_alignment"], background_color=formatting["background_color"], top_border=formatting["borders"]["top"], bottom_border=formatting["borders"]["bottom"], left_border=formatting["borders"]["left"], right_border=formatting["borders"]["right"], width=None, # Could be implemented if needed height=None # Could be implemented if needed ) else: # Minimal cell analysis without formatting details cell_analysis = CellStyleAnalysis( row_index=row_idx, column_index=col_idx, text_content=text_content, is_empty=is_empty, merge_info=merge_info, font_family=None, font_size=None, font_color=None, is_bold=False, is_italic=False, is_underlined=False, is_strikethrough=False, horizontal_alignment=None, vertical_alignment=None, background_color=None, top_border=None, bottom_border=None, left_border=None, right_border=None, width=None, height=None ) cell_row.append(cell_analysis) cells.append(cell_row) # Style consistency analysis consistent_fonts = len(font_families) <= 1 consistent_alignment = len(alignments) <= 1 consistent_borders = len(border_styles) <= 1 # Create table structure analysis table_analysis = TableStructureAnalysis( table_index=table_index, total_rows=total_rows, total_columns=total_columns, table_style_name=table_style_name, table_alignment=None, # Could be implemented if needed table_width=None, # Could be implemented if needed has_header_row=has_header_row, header_row_index=header_row_index, header_cells=header_cells, cells=cells, merged_cells_count=merged_cells_count, merge_regions=merge_regions, consistent_fonts=consistent_fonts, consistent_alignment=consistent_alignment, consistent_borders=consistent_borders, unique_font_families=list(font_families), unique_font_sizes=list(font_sizes), unique_colors=list(colors), unique_background_colors=list(background_colors) ) return OperationResponse.success( f"Table {table_index} structure analyzed successfully", table_analysis.to_dict() ) except DocumentNotFoundError: return OperationResponse.error("Document not loaded") except (InvalidTableIndexError,) as e: return OperationResponse.error(str(e)) except Exception as e: return OperationResponse.error(f"Failed to analyze table structure: {str(e)}") def analyze_all_tables( self, file_path: str, include_cell_details: bool = True ) -> OperationResponse: """ Analyze the structure and styling of all tables in the document. Args: file_path: Path to the document include_cell_details: Whether to include detailed cell analysis Returns: OperationResponse with analysis of all tables """ try: from datetime import datetime document = self.document_manager.get_or_load_document(file_path) if not document.tables: return OperationResponse.success( "No tables found in document", {"file_path": file_path, "total_tables": 0, "tables": []} ) table_analyses = [] # Analyze each table for table_idx in range(len(document.tables)): analysis_response = self.analyze_table_structure( file_path, table_idx, include_cell_details ) if analysis_response.success: # Extract the table analysis from the response data table_data = analysis_response.data table_analyses.append(TableStructureAnalysis( table_index=table_data["table_info"]["index"], total_rows=table_data["table_info"]["rows"], total_columns=table_data["table_info"]["columns"], table_style_name=table_data["table_info"]["style_name"], table_alignment=table_data["table_info"]["alignment"], table_width=table_data["table_info"]["width"], has_header_row=table_data["header_info"]["has_header"], header_row_index=table_data["header_info"]["header_row_index"], header_cells=table_data["header_info"]["header_cells"], cells=[], # We'll populate this if needed merged_cells_count=table_data["merge_analysis"]["merged_cells_count"], merge_regions=[], # We'll populate this if needed consistent_fonts=table_data["style_consistency"]["fonts"], consistent_alignment=table_data["style_consistency"]["alignment"], consistent_borders=table_data["style_consistency"]["borders"], unique_font_families=table_data["style_summary"]["font_families"], unique_font_sizes=table_data["style_summary"]["font_sizes"], unique_colors=table_data["style_summary"]["colors"], unique_background_colors=table_data["style_summary"]["background_colors"] )) else: # If individual table analysis fails, log it but continue continue # Create comprehensive analysis result analysis_result = TableAnalysisResult( file_path=file_path, total_tables=len(table_analyses), analysis_timestamp=datetime.now().isoformat(), tables=table_analyses ) return OperationResponse.success( f"Analyzed {len(table_analyses)} tables successfully", analysis_result.to_dict() ) except DocumentNotFoundError: return OperationResponse.error("Document not loaded") except Exception as e: return OperationResponse.error(f"Failed to analyze all tables: {str(e)}") def _apply_row_styling( self, new_rows, reference_row, default_text_format, default_alignment, default_background_color ): """ Apply styling to newly added rows. Args: new_rows: List of newly created row objects reference_row: Row to copy style from (if provided) default_text_format: Default text formatting default_alignment: Default alignment default_background_color: Default background color """ for new_row in new_rows: # Apply styling to each cell in the new row for col_idx, new_cell in enumerate(new_row.cells): # Determine reference cell for style copying reference_cell = None if reference_row and col_idx < len(reference_row.cells): reference_cell = reference_row.cells[col_idx] # Copy style from reference cell if available if reference_cell: self._copy_cell_style(new_cell, reference_cell) # Apply default formatting if no reference or to override if default_text_format or default_alignment or default_background_color: self._apply_default_cell_formatting( new_cell, default_text_format, default_alignment, default_background_color ) def _copy_cell_style(self, target_cell, source_cell): """ Copy all styling from source cell to target cell. Args: target_cell: Cell to apply styling to source_cell: Cell to copy styling from """ try: # Copy paragraph formatting for target_para, source_para in zip(target_cell.paragraphs, source_cell.paragraphs): # Copy paragraph alignment target_para.alignment = source_para.alignment # Copy run formatting if source_para.runs: # Clear existing runs in target for run in target_para.runs: run._element.getparent().remove(run._element) # Copy runs from source for source_run in source_para.runs: new_run = target_para.add_run("") # Copy font properties if source_run.font.name: new_run.font.name = source_run.font.name if source_run.font.size: new_run.font.size = source_run.font.size if source_run.font.color.rgb: new_run.font.color.rgb = source_run.font.color.rgb new_run.bold = source_run.bold new_run.italic = source_run.italic new_run.underline = source_run.underline # Copy cell background color try: source_shading = source_cell._element.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}shd') if source_shading is not None: target_shading = target_cell._element.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}shd') if target_shading is None: # Create shading element target_shading = OxmlElement('w:shd') target_cell._element.get_or_add_tcPr().append(target_shading) # Copy fill attribute if source_shading.get(qn('w:fill')): target_shading.set(qn('w:fill'), source_shading.get(qn('w:fill'))) except Exception: pass # Ignore background color copy errors # Copy cell vertical alignment try: source_valign = source_cell._element.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}vAlign') if source_valign is not None: target_valign = target_cell._element.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}vAlign') if target_valign is None: target_valign = OxmlElement('w:vAlign') target_cell._element.get_or_add_tcPr().append(target_valign) target_valign.set(qn('w:val'), source_valign.get(qn('w:val'))) except Exception: pass # Ignore vertical alignment copy errors # Copy cell borders try: ns = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' source_borders = source_cell._element.find(f'.//{{{ns}}}tcBorders') if source_borders is not None: # Get or create target tcPr target_tcPr = target_cell._element.get_or_add_tcPr() # Remove existing borders existing_borders = target_tcPr.find(f'.//{{{ns}}}tcBorders') if existing_borders is not None: target_tcPr.remove(existing_borders) # Clone the entire tcBorders element import copy new_borders = copy.deepcopy(source_borders) target_tcPr.append(new_borders) except Exception: pass # Ignore border copy errors except Exception as e: # If copying fails, just continue - better to have unstyled cells than no cells pass def _apply_default_cell_formatting( self, cell, text_format, alignment, background_color ): """ Apply default formatting to a cell. Args: cell: Cell to format text_format: TextFormat object with formatting alignment: CellAlignment object with alignment background_color: Background color as hex string """ try: # Apply text formatting if text_format: for paragraph in cell.paragraphs: if not paragraph.runs: paragraph.add_run("") for run in paragraph.runs: if text_format.font_family: run.font.name = text_format.font_family if text_format.font_size: run.font.size = Pt(text_format.font_size) if text_format.font_color: # Parse hex color try: color_hex = text_format.font_color.lstrip('#') if len(color_hex) == 6: r = int(color_hex[0:2], 16) g = int(color_hex[2:4], 16) b = int(color_hex[4:6], 16) run.font.color.rgb = RGBColor(r, g, b) except Exception: pass if text_format.bold is not None: run.bold = text_format.bold if text_format.italic is not None: run.italic = text_format.italic if text_format.underline is not None: run.underline = text_format.underline # Apply alignment if alignment: for paragraph in cell.paragraphs: if alignment.horizontal: alignment_map = { 'left': WD_ALIGN_PARAGRAPH.LEFT, 'center': WD_ALIGN_PARAGRAPH.CENTER, 'right': WD_ALIGN_PARAGRAPH.RIGHT, 'justify': WD_ALIGN_PARAGRAPH.JUSTIFY } if alignment.horizontal.lower() in alignment_map: paragraph.alignment = alignment_map[alignment.horizontal.lower()] # Apply vertical alignment if alignment.vertical: try: v_align = alignment.vertical.lower() if v_align == 'middle': v_align = 'center' valign_element = cell._element.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}vAlign') if valign_element is None: valign_element = OxmlElement('w:vAlign') cell._element.get_or_add_tcPr().append(valign_element) valign_element.set(qn('w:val'), v_align) except Exception: pass # Apply background color if background_color: try: color_hex = background_color.lstrip('#').upper() if len(color_hex) == 6: shading = cell._element.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}shd') if shading is None: shading = OxmlElement('w:shd') cell._element.get_or_add_tcPr().append(shading) shading.set(qn('w:fill'), color_hex) except Exception: pass except Exception as e: # If formatting fails, continue - better to have unformatted cells than no cells pass def _extract_border_data(self, borders_dict: dict) -> dict: """ Extract border data from extract_cell_formatting format to expected format. Args: borders_dict: Border data from extract_cell_formatting Returns: Border data in expected format for get_cell_value """ result = {} for side in ['top', 'bottom', 'left', 'right']: border_info = borders_dict.get(side, {}) if border_info: result[f'{side}_style'] = border_info.get('style') result[f'{side}_width'] = border_info.get('width') result[f'{side}_color'] = border_info.get('color') else: result[f'{side}_style'] = None result[f'{side}_width'] = None result[f'{side}_color'] = None return result def merge_cells( self, file_path: str, table_index: int, start_row: int, start_col: int, end_row: int, end_col: int ) -> OperationResponse: """ Merge cells in a table to create a merged cell region. Args: file_path: Path to the document table_index: Index of the table start_row: Starting row index (top-left corner) start_col: Starting column index (top-left corner) end_row: Ending row index (bottom-right corner) end_col: Ending column index (bottom-right corner) Returns: OperationResponse with merge result """ try: document = self.document_manager.get_or_load_document(file_path) validate_table_index(table_index, len(document.tables)) table = document.tables[table_index] # Validate cell positions validate_cell_position(start_row, start_col, len(table.rows), len(table.columns)) validate_cell_position(end_row, end_col, len(table.rows), len(table.columns)) # Validate merge range if start_row > end_row or start_col > end_col: return OperationResponse.error("Invalid merge range: start position must be before end position") if start_row == end_row and start_col == end_col: return OperationResponse.error("Cannot merge a single cell with itself") # Check if any cells in the range are already merged for row_idx in range(start_row, end_row + 1): for col_idx in range(start_col, end_col + 1): cell = table.cell(row_idx, col_idx) merge_info = analyze_cell_merge(cell, row_idx, col_idx) if merge_info and merge_info.merge_type != CellMergeType.NONE: return OperationResponse.error( f"Cell at row {row_idx}, col {col_idx} is already merged. " "Cannot merge cells that are part of existing merged regions." ) # Get the top-left cell (this will be the merged cell) merged_cell = table.cell(start_row, start_col) # Store content from all cells in the range all_content = [] for row_idx in range(start_row, end_row + 1): for col_idx in range(start_col, end_col + 1): cell = table.cell(row_idx, col_idx) if cell.text.strip(): all_content.append(cell.text.strip()) # Combine all content (separated by spaces) combined_content = " ".join(all_content) # Perform the merge using python-docx's merge method merged_cell = table.cell(start_row, start_col) merged_cell.merge(table.cell(end_row, end_col)) # Set the combined content merged_cell.text = combined_content # Calculate merge dimensions span_rows = end_row - start_row + 1 span_cols = end_col - start_col + 1 data = { "table_index": table_index, "start_row": start_row, "start_col": start_col, "end_row": end_row, "end_col": end_col, "span_rows": span_rows, "span_cols": span_cols, "merged_content": combined_content, "cells_merged": span_rows * span_cols } return OperationResponse.success( f"Successfully merged {span_rows}x{span_cols} cells starting at row {start_row}, col {start_col}", data ) except (InvalidTableIndexError, InvalidCellPositionError) as e: return OperationResponse.error(str(e)) except Exception as e: return OperationResponse.error(f"Failed to merge cells: {str(e)}") def unmerge_cells( self, file_path: str, table_index: int, row: int, column: int ) -> OperationResponse: """ Unmerge a merged cell region, splitting it back into individual cells. Args: file_path: Path to the document table_index: Index of the table row: Row index of any cell in the merged region column: Column index of any cell in the merged region Returns: OperationResponse with unmerge result """ try: document = self.document_manager.get_or_load_document(file_path) validate_table_index(table_index, len(document.tables)) table = document.tables[table_index] validate_cell_position(row, column, len(table.rows), len(table.columns)) # Get the cell and check if it's part of a merged region cell = table.cell(row, column) merge_info = analyze_cell_merge(cell, row, column) if not merge_info or merge_info.merge_type == CellMergeType.NONE: return OperationResponse.error( f"Cell at row {row}, col {column} is not part of a merged region" ) # Store the content before unmerging original_content = cell.text # Get the actual merged cell (top-left corner) merged_cell = table.cell(merge_info.start_row, merge_info.start_col) # Store merge dimensions span_rows = merge_info.span_rows span_cols = merge_info.span_cols # Unmerge by splitting the merged cell # This is done by removing the gridSpan and vMerge attributes try: # Get the table element table_element = table._element # Find the merged cell element merged_cell_element = merged_cell._element # Remove gridSpan and vMerge attributes from all cells in the merged region for row_idx in range(merge_info.start_row, merge_info.end_row + 1): for col_idx in range(merge_info.start_col, merge_info.end_col + 1): current_cell = table.cell(row_idx, col_idx) current_cell_element = current_cell._element # Get tcPr element tc_pr = current_cell_element.find(qn('w:tcPr')) if tc_pr is not None: # Remove gridSpan grid_span = tc_pr.find(qn('w:gridSpan')) if grid_span is not None: tc_pr.remove(grid_span) # Remove vMerge v_merge = tc_pr.find(qn('w:vMerge')) if v_merge is not None: tc_pr.remove(v_merge) # If this is not the top-left cell, remove the cell content if row_idx != merge_info.start_row or col_idx != merge_info.start_col: # Clear the cell content current_cell.text = "" # Distribute content to individual cells (optional) # For now, we'll keep the content in the top-left cell only # Users can manually distribute content if needed except Exception as e: return OperationResponse.error(f"Failed to unmerge cells: {str(e)}") data = { "table_index": table_index, "original_merged_region": { "start_row": merge_info.start_row, "start_col": merge_info.start_col, "end_row": merge_info.end_row, "end_col": merge_info.end_col, "span_rows": span_rows, "span_cols": span_cols }, "unmerged_at": { "row": row, "column": column }, "original_content": original_content, "cells_unmerged": span_rows * span_cols } return OperationResponse.success( f"Successfully unmerged {span_rows}x{span_cols} cell region starting at row {merge_info.start_row}, col {merge_info.start_col}", data ) except (InvalidTableIndexError, InvalidCellPositionError) as e: return OperationResponse.error(str(e)) except Exception as e: return OperationResponse.error(f"Failed to unmerge cells: {str(e)}") def _apply_column_styling_after_add( self, table, original_cols, count, column_index, reference_column, default_text_format, default_alignment, default_background_color ): """ Apply styling to newly added column cells after they have been added. Args: table: The table object original_cols: Original number of columns count: Number of columns added position: Position where columns were added column_index: Column index for at_index position reference_column: List of cells to copy style from (if provided) default_text_format: Default text formatting default_alignment: Default alignment default_background_color: Default background color """ # Determine which columns were added based on the new index semantics if column_index is None: # Appended at end new_column_indices = list(range(original_cols, original_cols + count)) elif column_index == -1: # Inserted before first column -> new columns occupy indices 0..count-1 new_column_indices = list(range(0, count)) else: # Inserted after column_index -> new columns start at column_index + 1 new_column_indices = list(range(column_index + 1, column_index + 1 + count)) # Apply styling to each new column for col_idx in new_column_indices: for row_idx, row in enumerate(table.rows): if col_idx < len(row.cells): new_cell = row.cells[col_idx] # Determine reference cell for style copying reference_cell = None if reference_column and row_idx < len(reference_column): reference_cell = reference_column[row_idx] # Copy style from reference cell if available if reference_cell: self._copy_cell_style(new_cell, reference_cell) # Apply default formatting if no reference or to override if default_text_format or default_alignment or default_background_color: self._apply_default_cell_formatting( new_cell, default_text_format, default_alignment, default_background_color )

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Rookie0x80/docx-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server