MCP Sheet Parser

MIT License

Overview InspectNew Endpoints Schema Related Servers Reviews Score

table_structure_converter.py•13.5 kB

from typing import Any from src.models.table_model import Sheet, Row from src.utils.range_parser import parse_range_string from src.utils.html_utils import escape_html, create_html_element, create_table_cell class TableStructureConverter: """处理 HTML 表格结构的生成。""" def __init__(self, cell_converter, style_converter): self.cell_converter = cell_converter self.style_converter = style_converter def generate_table(self, sheet: Sheet, styles: dict[str, Any], header_rows: int) -> str: """ 生成表格的 HTML。 """ occupied_cells: set[tuple[int, int]] = set() merged_cells_map: dict[tuple[int, int], dict[str, int]] = {} for merged_range in sheet.merged_cells: try: start_row, start_col, end_row, end_col = parse_range_string(merged_range) row_span = end_row - start_row + 1 col_span = end_col - start_col + 1 if row_span > 1 or col_span > 1: merged_cells_map[(start_row, start_col)] = {"rowspan": row_span, "colspan": col_span} for r in range(start_row, end_row + 1): for c in range(start_col, end_col + 1): if (r, c) != (start_row, start_col): occupied_cells.add((r, c)) except ValueError as e: print(f"Could not parse merged cell range '{merged_range}': {e}") # 创建表格开始标签，包含属性 table_attrs = { 'role': 'table', 'aria-label': f'Table: {sheet.name}' } table_parts = [create_html_element('table', '', attributes=table_attrs).replace('></table>', '>')] if sheet.name and sheet.name.strip(): caption = create_html_element('caption', f'Table: {escape_html(sheet.name)}') table_parts.append(caption) style_key_to_id_map = {self.style_converter.get_style_key(style_obj): style_id for style_id, style_obj in styles.items()} if header_rows > 0 and len(sheet.rows) > 0: table_parts.append('<thead>') self._generate_rows_html(table_parts, sheet.rows[:header_rows], occupied_cells, merged_cells_map, style_key_to_id_map, is_header=True) table_parts.append('</thead>') if len(sheet.rows) > header_rows: table_parts.append('<tbody>') self._generate_rows_html(table_parts, sheet.rows[header_rows:], occupied_cells, merged_cells_map, style_key_to_id_map, is_header=False, row_offset=header_rows) table_parts.append('</tbody>') else: # 即使没有数据行，也要添加空的tbody table_parts.append('<tbody>') table_parts.append('</tbody>') else: if sheet.rows: # 只有当有行时才添加tbody table_parts.append('<tbody>') self._generate_rows_html(table_parts, sheet.rows, occupied_cells, merged_cells_map, style_key_to_id_map, is_header=False) table_parts.append('</tbody>') else: # 空表格也需要tbody table_parts.append('<tbody>') table_parts.append('</tbody>') table_parts.append('</table>') return "\n".join(table_parts) def _generate_row_html(self, row: Row, styles: dict[str, Any], is_header: bool = False, occupied_cells: set | None = None, merged_cells_map: dict | None = None, row_idx: int = 0) -> str: """ 生成单行的HTML。 """ if occupied_cells is None: occupied_cells = set() if merged_cells_map is None: merged_cells_map = {} row_parts = ['<tr>'] for c_idx, cell in enumerate(row.cells): if (row_idx, c_idx) in occupied_cells: continue # 构建样式类和跨度属性 style_class = "" span_attrs = "" if (row_idx, c_idx) in merged_cells_map: spans = merged_cells_map[(row_idx, c_idx)] if spans["rowspan"] > 1: span_attrs += f' rowspan="{spans["rowspan"]}"' if spans["colspan"] > 1: span_attrs += f' colspan="{spans["colspan"]}"' cell_html = self._generate_cell_html(cell, style_class, span_attrs, is_header) row_parts.append(cell_html) row_parts.append('</tr>') return ''.join(row_parts) def _generate_rows_html(self, table_parts: list, rows: list, occupied_cells: set, merged_cells_map: dict, style_key_to_id_map: dict, is_header: bool = False, row_offset: int = 0): for r_idx, row in enumerate(rows): actual_row_idx = r_idx + row_offset table_parts.append('<tr>') # 找到行中最后一个有内容的单元格位置 last_content_col = self._find_last_content_column(row, actual_row_idx, occupied_cells, merged_cells_map) for c_idx, cell in enumerate(row.cells): if (actual_row_idx, c_idx) in occupied_cells: continue # 如果超过了最后有内容的列，且当前单元格为空，则跳过 if c_idx > last_content_col and not self._has_meaningful_content(cell): continue style_class = "" css_classes = [] if cell.style: style_key = self.style_converter.get_style_key(cell.style) style_id = style_key_to_id_map.get(style_key) if style_id: css_classes.append(style_id) if cell.style.wrap_text: css_classes.append("wrap-text") # 检查是否需要文字溢出显示（Excel特性） overflow_style = "" if self._should_overflow_text(cell, row, c_idx): css_classes.append("text-overflow") # 添加内联样式确保最高优先级 overflow_style = ' style="overflow: visible !important; white-space: nowrap !important; width: auto !important; min-width: auto !important; word-wrap: normal !important; position: relative; z-index: 5;"' if css_classes: style_class = f' class="{" ".join(css_classes)}"' span_attrs = "" if (actual_row_idx, c_idx) in merged_cells_map: spans = merged_cells_map[(actual_row_idx, c_idx)] if spans["rowspan"] > 1: span_attrs += f' rowspan="{spans["rowspan"]}"' if spans["colspan"] > 1: span_attrs += f' colspan="{spans["colspan"]}"' cell_html = self._generate_cell_html(cell, style_class, span_attrs, is_header, overflow_style) table_parts.append(cell_html) table_parts.append('</tr>') def _generate_cell_html(self, cell, style_class, span_attrs, is_header, overflow_style="", colspan=None, rowspan=None): cell_content = self.cell_converter.convert(cell) # 处理超链接 if cell.style and cell.style.hyperlink: href = escape_html(cell.style.hyperlink) cell_content = create_html_element('a', cell_content, attributes={'href': href}) # 构建标题属性 title_parts = [] if cell.style and cell.style.comment: title_parts.append(escape_html(cell.style.comment)) if cell.formula: title_parts.append(f"Formula: {escape_html(cell.formula)}") # 构建属性字典 cell_attrs = {} if title_parts: cell_attrs['title'] = " | ".join(title_parts) if cell.style and cell.style.number_format: cell_attrs['data-number-format'] = escape_html(cell.style.number_format) # 解析跨度属性 final_rowspan = rowspan if rowspan is not None else 1 final_colspan = colspan if colspan is not None else 1 # 如果span_attrs中有跨度信息，优先使用 if ' rowspan="' in span_attrs: final_rowspan = int(span_attrs.split('rowspan="')[1].split('"')[0]) if ' colspan="' in span_attrs: final_colspan = int(span_attrs.split('colspan="')[1].split('"')[0]) # 解析CSS类 css_classes = [] if ' class="' in style_class: css_classes = style_class.split('class="')[1].split('"')[0].split() # 解析内联样式 inline_styles = {} if overflow_style and 'style="' in overflow_style: style_content = overflow_style.split('style="')[1].split('"')[0] for style_pair in style_content.split(';'): if ':' in style_pair: key, value = style_pair.split(':', 1) inline_styles[key.strip()] = value.strip() # 使用工具函数创建表格单元格 return create_table_cell( content=cell_content, is_header=is_header, rowspan=final_rowspan, colspan=final_colspan, css_classes=css_classes, inline_styles=inline_styles, title=cell_attrs.get('title', '') ) def _should_overflow_text(self, cell, row, col_idx): """ 检查单元格是否应应用文字溢出显示（模拟Excel行为）。条件： 1. 有文字内容 2. 文字长度超过阈值 3. 右侧单元格为空 4. 未设置文字换行 """ if not cell.value: return False # 检查文字长度 (中文字符按2倍计算) cell_text = str(cell.value).strip() # 计算显示宽度：中文字符按2倍计算，英文字符按1倍计算 display_width = sum(2 if ord(c) > 127 else 1 for c in cell_text) TEXT_OVERFLOW_THRESHOLD = 8 # 短文字不需要溢出的阈值 if display_width <= TEXT_OVERFLOW_THRESHOLD: return False # 检查是否设置了文字换行 if cell.style and cell.style.wrap_text: return False # 如果设置了换行，不应该溢出 # 检查右边的单元格是否为空 next_col_idx = col_idx + 1 if next_col_idx < len(row.cells): next_cell = row.cells[next_col_idx] if next_cell.value: # 右边有内容，不应该溢出 return False return True # 满足所有条件，应该溢出显示 def _find_last_content_column(self, row, actual_row_idx: int, occupied_cells: set, merged_cells_map: dict) -> int: """ 找到行中最后一个有内容的列索引。参数： row 行对象，actual_row_idx 实际行索引，occupied_cells 被占用位置集合，merged_cells_map 合并单元格映射返回：最后一个有内容的列索引，无内容则返回-1 """ last_content_col = -1 for c_idx, cell in enumerate(row.cells): # 跳过被占用的单元格 if (actual_row_idx, c_idx) in occupied_cells: continue # 检查是否有意义的内容 if self._has_meaningful_content(cell): last_content_col = c_idx # 如果是合并单元格的起始位置，也算作有内容 elif (actual_row_idx, c_idx) in merged_cells_map: last_content_col = c_idx return last_content_col def _has_meaningful_content(self, cell) -> bool: """ 检查单元格是否有有意义内容。参数： cell 单元格对象返回：有内容返回True，否则False """ # 检查值 if cell.value is not None and str(cell.value).strip(): return True # 检查是否有公式 if cell.formula: return True # 检查是否有特殊样式（背景色、边框等） if cell.style: # 如果有背景色（非默认） if (hasattr(cell.style, 'background_color') and cell.style.background_color and cell.style.background_color.lower() not in ['ffffff', 'white', 'none', 'auto']): return True # 如果有边框 if (hasattr(cell.style, 'border_top') and cell.style.border_top) or \ (hasattr(cell.style, 'border_bottom') and cell.style.border_bottom) or \ (hasattr(cell.style, 'border_left') and cell.style.border_left) or \ (hasattr(cell.style, 'border_right') and cell.style.border_right): return True return False

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yuqie6/MCP-Sheet-Parser-cot'

If you have feedback or need assistance with the MCP directory API, please join our Discord server