DOCX-MCP

Overview Schema Related Servers Score Discussions

table_analysis.py•13.9 KiB

"""Data models for comprehensive table structure and style analysis.""" from dataclasses import dataclass from typing import List, Optional, Dict, Any, Tuple from enum import Enum class CellMergeType(Enum): """Types of cell merging.""" NONE = "none" HORIZONTAL = "horizontal" # Cell spans multiple columns VERTICAL = "vertical" # Cell spans multiple rows BOTH = "both" # Cell spans both rows and columns @dataclass class MergeInfo: """Information about cell merging.""" merge_type: CellMergeType start_row: int end_row: int start_col: int end_col: int span_rows: int span_cols: int @dataclass class CellStyleAnalysis: """Comprehensive analysis of a single cell's styling.""" # Position information row_index: int column_index: int # Content text_content: str is_empty: bool # Merge information merge_info: Optional[MergeInfo] # Text formatting font_family: Optional[str] font_size: Optional[int] font_color: Optional[str] is_bold: bool is_italic: bool is_underlined: bool is_strikethrough: bool # Alignment horizontal_alignment: Optional[str] # left, center, right, justify vertical_alignment: Optional[str] # top, middle, bottom # Background and borders background_color: Optional[str] # Border information for each side top_border: Optional[Dict[str, str]] # style, width, color bottom_border: Optional[Dict[str, str]] left_border: Optional[Dict[str, str]] right_border: Optional[Dict[str, str]] # Cell dimensions (if available) width: Optional[float] # In points or inches height: Optional[float] def to_dict(self) -> Dict[str, Any]: """Convert to dictionary representation.""" result = { "position": { "row": self.row_index, "column": self.column_index }, "content": { "text": self.text_content, "is_empty": self.is_empty }, "text_format": { "font_family": self.font_family, "font_size": self.font_size, "font_color": self.font_color, "bold": self.is_bold, "italic": self.is_italic, "underlined": self.is_underlined, "strikethrough": self.is_strikethrough }, "alignment": { "horizontal": self.horizontal_alignment, "vertical": self.vertical_alignment }, "background": { "color": self.background_color }, "borders": { "top": self.top_border, "bottom": self.bottom_border, "left": self.left_border, "right": self.right_border } } if self.merge_info: result["merge"] = { "type": self.merge_info.merge_type.value, "start_row": self.merge_info.start_row, "end_row": self.merge_info.end_row, "start_col": self.merge_info.start_col, "end_col": self.merge_info.end_col, "span_rows": self.merge_info.span_rows, "span_cols": self.merge_info.span_cols } else: result["merge"] = None if self.width is not None or self.height is not None: result["dimensions"] = { "width": self.width, "height": self.height } return result @dataclass class TableStructureAnalysis: """Comprehensive analysis of table structure and styling.""" # Basic table information table_index: int total_rows: int total_columns: int # Table-level properties table_style_name: Optional[str] table_alignment: Optional[str] # left, center, right table_width: Optional[float] # Header information has_header_row: bool header_row_index: Optional[int] header_cells: Optional[List[str]] # All cell analyses cells: List[List[CellStyleAnalysis]] # [row][column] # Merge summary merged_cells_count: int merge_regions: List[MergeInfo] # Style consistency analysis consistent_fonts: bool consistent_alignment: bool consistent_borders: bool # Common styles found unique_font_families: List[str] unique_font_sizes: List[int] unique_colors: List[str] unique_background_colors: List[str] def to_dict(self) -> Dict[str, Any]: """Convert to dictionary representation.""" return { "table_info": { "index": self.table_index, "rows": self.total_rows, "columns": self.total_columns, "style_name": self.table_style_name, "alignment": self.table_alignment, "width": self.table_width }, "header_info": { "has_header": self.has_header_row, "header_row_index": self.header_row_index, "header_cells": self.header_cells }, "cells": [ [cell.to_dict() for cell in row] for row in self.cells ], "merge_analysis": { "merged_cells_count": self.merged_cells_count, "merge_regions": [ { "type": merge.merge_type.value, "start_row": merge.start_row, "end_row": merge.end_row, "start_col": merge.start_col, "end_col": merge.end_col, "span_rows": merge.span_rows, "span_cols": merge.span_cols } for merge in self.merge_regions ] }, "style_consistency": { "fonts": self.consistent_fonts, "alignment": self.consistent_alignment, "borders": self.consistent_borders }, "style_summary": { "font_families": self.unique_font_families, "font_sizes": self.unique_font_sizes, "colors": self.unique_colors, "background_colors": self.unique_background_colors } } @dataclass class TableAnalysisResult: """Result of comprehensive table analysis.""" file_path: str total_tables: int analysis_timestamp: str tables: List[TableStructureAnalysis] def to_dict(self) -> Dict[str, Any]: """Convert to dictionary representation.""" return { "file_info": { "path": self.file_path, "total_tables": self.total_tables, "analysis_timestamp": self.analysis_timestamp }, "tables": [table.to_dict() for table in self.tables] } # Helper functions for analysis def analyze_cell_merge(cell, row_idx: int, col_idx: int) -> Optional[MergeInfo]: """Analyze if a cell is part of a merge and return merge information.""" try: # Check if cell is merged if hasattr(cell, '_element'): tc_element = cell._element # Check for gridSpan (horizontal merge) grid_span = tc_element.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}gridSpan') # Check for vMerge (vertical merge) tc_pr = tc_element.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}tcPr') v_merge = None if tc_pr is not None: v_merge_elem = tc_pr.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}vMerge') if v_merge_elem is not None: v_merge = v_merge_elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val') # Determine merge type and span span_cols = int(grid_span) if grid_span else 1 span_rows = 1 # Default for horizontal-only merge # For vertical merge, we need to count consecutive vMerge="restart" cells if v_merge == "restart": # This is the start of a vertical merge # Count how many cells below have vMerge="continue" span_rows = 1 # Note: This is a simplified implementation # In a full implementation, we'd need to traverse the table structure elif v_merge == "continue": # This cell is part of a vertical merge but not the start # We can't determine the full span from here span_rows = 1 # Check if this cell is part of a merged region # For python-docx, merged cells have either gridSpan > 1 or vMerge attributes if span_cols > 1 or v_merge is not None: merge_type = CellMergeType.NONE if span_cols > 1 and v_merge is not None: merge_type = CellMergeType.BOTH elif span_cols > 1: merge_type = CellMergeType.HORIZONTAL elif v_merge is not None: merge_type = CellMergeType.VERTICAL return MergeInfo( merge_type=merge_type, start_row=row_idx, end_row=row_idx + span_rows - 1, start_col=col_idx, end_col=col_idx + span_cols - 1, span_rows=span_rows, span_cols=span_cols ) except Exception: pass return None def extract_cell_formatting(cell) -> Dict[str, Any]: """Extract comprehensive formatting information from a cell.""" formatting = { "font_family": None, "font_size": None, "font_color": None, "is_bold": False, "is_italic": False, "is_underlined": False, "is_strikethrough": False, "horizontal_alignment": None, "vertical_alignment": None, "background_color": None, "borders": { "top": None, "bottom": None, "left": None, "right": None } } try: # Get the first paragraph and run for text formatting if cell.paragraphs: paragraph = cell.paragraphs[0] # Paragraph alignment if paragraph.alignment is not None: alignment_map = { 0: "left", 1: "center", 2: "right", 3: "justify" } formatting["horizontal_alignment"] = alignment_map.get(paragraph.alignment) # Run formatting (text properties) if paragraph.runs: run = paragraph.runs[0] if run.font.name: formatting["font_family"] = run.font.name if run.font.size: formatting["font_size"] = run.font.size.pt if run.font.color and run.font.color.rgb: formatting["font_color"] = str(run.font.color.rgb) formatting["is_bold"] = run.bold or False formatting["is_italic"] = run.italic or False formatting["is_underlined"] = run.underline or False # Cell-level formatting (background, borders) if hasattr(cell, '_element'): tc_element = cell._element tc_pr = tc_element.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}tcPr') if tc_pr is not None: # Background color (shading) shd = tc_pr.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}shd') if shd is not None: fill = shd.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}fill') if fill: formatting["background_color"] = fill # Vertical alignment v_align = tc_pr.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}vAlign') if v_align is not None: val = v_align.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val') formatting["vertical_alignment"] = val # Borders tc_borders = tc_pr.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}tcBorders') if tc_borders is not None: ns = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' for border_side in ["top", "bottom", "left", "right"]: # Try multiple methods to find border elements border_elem = tc_borders.find(f'.//{{{ns}}}{border_side}') if border_elem is None: # Fallback: direct child search for child in tc_borders: if child.tag == f'{{{ns}}}{border_side}': border_elem = child break if border_elem is not None: border_info = { "style": border_elem.get(f'{{{ns}}}val'), "width": border_elem.get(f'{{{ns}}}sz'), "color": border_elem.get(f'{{{ns}}}color') } formatting["borders"][border_side] = border_info except Exception: # If any error occurs, return the partial formatting extracted pass return formatting

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Rookie0x80/docx-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

table_analysis.py•13.9 KiB