chatExcel

excel_helper.py•6.01 KiB

"""Excel文件智能分析和参数推荐辅助函数""" import pandas as pd import openpyxl from typing import Dict, Any, List, Optional def _suggest_excel_read_parameters(file_path: str, sheet_name: str = None) -> Dict[str, Any]: """ 分析Excel文件结构并建议最佳的读取参数使用增强的多级列头检测器 Args: file_path: Excel文件路径 sheet_name: 工作表名称 Returns: 包含建议参数的字典 """ try: # 导入增强检测器 from .enhanced_multiheader_detector import enhanced_suggest_excel_read_parameters # 使用增强检测器 result = enhanced_suggest_excel_read_parameters(file_path, sheet_name) # 如果增强检测器成功，直接返回结果 if "error" not in result.get("analysis", {}): return result # 如果增强检测器失败，回退到简化版本 return _fallback_suggest_parameters(file_path, sheet_name) except ImportError: # 如果无法导入增强检测器，使用回退方案 return _fallback_suggest_parameters(file_path, sheet_name) except Exception as e: # 其他错误也使用回退方案 return _fallback_suggest_parameters(file_path, sheet_name) def _fallback_suggest_parameters(file_path: str, sheet_name: str = None) -> Dict[str, Any]: """ 回退的参数建议方案（简化版本） Args: file_path: Excel文件路径 sheet_name: 工作表名称 Returns: 包含建议参数的字典 """ try: # 读取前10行数据进行分析 sample_df = pd.read_excel(file_path, sheet_name=sheet_name, header=None, nrows=10) suggestions = { "recommended_params": {}, "analysis": { "empty_rows": [], "potential_headers": [], "multi_level_header_detected": False, "fallback_mode": True }, "warnings": ["使用简化检测模式"], "tips": [] } # 检测空行 empty_rows = [] for i, row in sample_df.iterrows(): if row.isna().all(): empty_rows.append(i) suggestions["analysis"]["empty_rows"] = empty_rows # 检测潜在的标题行 potential_headers = [] for i, row in sample_df.iterrows(): if not row.isna().all(): # 检查是否主要包含文本（可能是标题） text_count = sum(1 for val in row if pd.notna(val) and isinstance(val, str)) total_count = sum(1 for val in row if pd.notna(val)) if total_count > 0 and text_count / total_count > 0.5: potential_headers.append(i) suggestions["analysis"]["potential_headers"] = potential_headers # 处理skiprows if empty_rows and empty_rows[0] == 0: consecutive_empty = 0 for i in empty_rows: if i == consecutive_empty: consecutive_empty += 1 else: break if consecutive_empty > 0: suggestions["recommended_params"]["skiprows"] = consecutive_empty suggestions["tips"].append(f"检测到前{consecutive_empty}行为空行，建议跳过") # 简化的单级列头处理 if potential_headers: header_row = potential_headers[-1] skiprows = suggestions["recommended_params"].get("skiprows", 0) adjusted_header = max(0, header_row - skiprows) suggestions["recommended_params"]["header"] = adjusted_header suggestions["tips"].append(f"使用第{header_row + 1}行作为列头") else: suggestions["recommended_params"]["header"] = 0 suggestions["tips"].append("使用第1行作为列头") return suggestions except Exception as e: # 如果分析失败，返回默认建议 return { "recommended_params": {"header": 0}, "analysis": { "multi_level_header_detected": False, "error": str(e), "fallback_mode": True }, "warnings": [f"文件分析失败，使用默认参数: {str(e)}"], "tips": ["建议手动检查文件格式"] } def detect_excel_structure(file_path: str, sheet_name: str = None) -> Dict[str, Any]: """检测Excel文件的详细结构信息 Args: file_path: Excel文件路径 sheet_name: 工作表名称 Returns: dict: 详细的结构信息 """ structure_info = { "sheets": [], "merged_cells": [], "data_range": {}, "formatting_info": {} } try: workbook = openpyxl.load_workbook(file_path) # 获取所有工作表信息 for sheet in workbook.worksheets: sheet_info = { "name": sheet.title, "max_row": sheet.max_row, "max_column": sheet.max_column, "merged_cells": [str(merged_range) for merged_range in sheet.merged_cells.ranges] } structure_info["sheets"].append(sheet_info) # 如果指定了工作表，获取详细信息 if sheet_name and sheet_name in [s.title for s in workbook.worksheets]: target_sheet = workbook[sheet_name] structure_info["merged_cells"] = [str(merged_range) for merged_range in target_sheet.merged_cells.ranges] structure_info["data_range"] = { "min_row": target_sheet.min_row, "max_row": target_sheet.max_row, "min_column": target_sheet.min_column, "max_column": target_sheet.max_column } workbook.close() except Exception as e: structure_info["error"] = str(e) return structure_info

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Lillard01/chatExcel-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

excel_helper.py•6.01 KiB