Skip to main content
Glama
excel_helper.py6.16 kB
"""Excel文件智能分析和参数推荐辅助函数""" import pandas as pd import openpyxl from typing import Dict, Any, List, Optional def _suggest_excel_read_parameters(file_path: str, sheet_name: str = None) -> Dict[str, Any]: """ 分析Excel文件结构并建议最佳的读取参数 使用增强的多级列头检测器 Args: file_path: Excel文件路径 sheet_name: 工作表名称 Returns: 包含建议参数的字典 """ try: # 导入增强检测器 from .enhanced_multiheader_detector import enhanced_suggest_excel_read_parameters # 使用增强检测器 result = enhanced_suggest_excel_read_parameters(file_path, sheet_name) # 如果增强检测器成功,直接返回结果 if "error" not in result.get("analysis", {}): return result # 如果增强检测器失败,回退到简化版本 return _fallback_suggest_parameters(file_path, sheet_name) except ImportError: # 如果无法导入增强检测器,使用回退方案 return _fallback_suggest_parameters(file_path, sheet_name) except Exception as e: # 其他错误也使用回退方案 return _fallback_suggest_parameters(file_path, sheet_name) def _fallback_suggest_parameters(file_path: str, sheet_name: str = None) -> Dict[str, Any]: """ 回退的参数建议方案(简化版本) Args: file_path: Excel文件路径 sheet_name: 工作表名称 Returns: 包含建议参数的字典 """ try: # 读取前10行数据进行分析 sample_df = pd.read_excel(file_path, sheet_name=sheet_name, header=None, nrows=10) suggestions = { "recommended_params": {}, "analysis": { "empty_rows": [], "potential_headers": [], "multi_level_header_detected": False, "fallback_mode": True }, "warnings": ["使用简化检测模式"], "tips": [] } # 检测空行 empty_rows = [] for i, row in sample_df.iterrows(): if row.isna().all(): empty_rows.append(i) suggestions["analysis"]["empty_rows"] = empty_rows # 检测潜在的标题行 potential_headers = [] for i, row in sample_df.iterrows(): if not row.isna().all(): # 检查是否主要包含文本(可能是标题) text_count = sum(1 for val in row if pd.notna(val) and isinstance(val, str)) total_count = sum(1 for val in row if pd.notna(val)) if total_count > 0 and text_count / total_count > 0.5: potential_headers.append(i) suggestions["analysis"]["potential_headers"] = potential_headers # 处理skiprows if empty_rows and empty_rows[0] == 0: consecutive_empty = 0 for i in empty_rows: if i == consecutive_empty: consecutive_empty += 1 else: break if consecutive_empty > 0: suggestions["recommended_params"]["skiprows"] = consecutive_empty suggestions["tips"].append(f"检测到前{consecutive_empty}行为空行,建议跳过") # 简化的单级列头处理 if potential_headers: header_row = potential_headers[-1] skiprows = suggestions["recommended_params"].get("skiprows", 0) adjusted_header = max(0, header_row - skiprows) suggestions["recommended_params"]["header"] = adjusted_header suggestions["tips"].append(f"使用第{header_row + 1}行作为列头") else: suggestions["recommended_params"]["header"] = 0 suggestions["tips"].append("使用第1行作为列头") return suggestions except Exception as e: # 如果分析失败,返回默认建议 return { "recommended_params": {"header": 0}, "analysis": { "multi_level_header_detected": False, "error": str(e), "fallback_mode": True }, "warnings": [f"文件分析失败,使用默认参数: {str(e)}"], "tips": ["建议手动检查文件格式"] } def detect_excel_structure(file_path: str, sheet_name: str = None) -> Dict[str, Any]: """检测Excel文件的详细结构信息 Args: file_path: Excel文件路径 sheet_name: 工作表名称 Returns: dict: 详细的结构信息 """ structure_info = { "sheets": [], "merged_cells": [], "data_range": {}, "formatting_info": {} } try: workbook = openpyxl.load_workbook(file_path) # 获取所有工作表信息 for sheet in workbook.worksheets: sheet_info = { "name": sheet.title, "max_row": sheet.max_row, "max_column": sheet.max_column, "merged_cells": [str(merged_range) for merged_range in sheet.merged_cells.ranges] } structure_info["sheets"].append(sheet_info) # 如果指定了工作表,获取详细信息 if sheet_name and sheet_name in [s.title for s in workbook.worksheets]: target_sheet = workbook[sheet_name] structure_info["merged_cells"] = [str(merged_range) for merged_range in target_sheet.merged_cells.ranges] structure_info["data_range"] = { "min_row": target_sheet.min_row, "max_row": target_sheet.max_row, "min_column": target_sheet.min_column, "max_column": target_sheet.max_column } workbook.close() except Exception as e: structure_info["error"] = str(e) return structure_info

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Lillard01/chatExcel-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server