file_manager.py•7.42 kB
"""
Excel文件管理模块
处理Excel文件的读取、写入、备份等操作
"""
import shutil
import logging
from pathlib import Path
from datetime import datetime
from typing import Optional, Dict, Any, List
import openpyxl
import pandas as pd
from openpyxl import Workbook, load_workbook
from openpyxl.utils.exceptions import InvalidFileException
from .config import ConfigManager
from .security import SecurityValidator
class FileManager:
"""Excel文件管理器"""
def __init__(self, config_manager: ConfigManager, security_validator: SecurityValidator):
self.config_manager = config_manager
self.security_validator = security_validator
self.temp_dir = config_manager.get_temp_dir()
def validate_file(self, file_path: str) -> bool:
"""验证文件是否有效"""
# 安全验证
if not self.security_validator.validate_file_path(file_path):
return False
# 大小验证
if not self.security_validator.check_file_size(file_path):
logging.error(f"文件大小超出限制: {file_path}")
return False
return True
def load_workbook(self, file_path: str, read_only: bool = False) -> Optional[openpyxl.Workbook]:
"""加载Excel工作簿"""
try:
if not self.validate_file(file_path):
return None
path = Path(file_path)
if not path.exists():
logging.error(f"文件不存在: {file_path}")
return None
# 尝试加载工作簿
workbook = load_workbook(filename=file_path, read_only=read_only, data_only=False)
logging.info(f"成功加载工作簿: {file_path}")
return workbook
except InvalidFileException as e:
logging.error(f"无效的Excel文件: {file_path}, 错误: {e}")
return None
except Exception as e:
logging.error(f"加载工作簿失败: {file_path}, 错误: {e}")
return None
def save_workbook(self, workbook: openpyxl.Workbook, file_path: str) -> bool:
"""保存Excel工作簿"""
try:
if not self.validate_file(file_path):
return False
# 创建备份
if self.config_manager.is_backup_enabled() and Path(file_path).exists():
self.create_backup(file_path)
# 保存工作簿
workbook.save(filename=file_path)
logging.info(f"成功保存工作簿: {file_path}")
return True
except Exception as e:
logging.error(f"保存工作簿失败: {file_path}, 错误: {e}")
return False
def create_backup(self, file_path: str) -> Optional[str]:
"""创建文件备份"""
try:
path = Path(file_path)
if not path.exists():
return None
# 生成备份文件名
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"{path.stem}_backup_{timestamp}{path.suffix}"
backup_path = self.temp_dir / backup_name
# 复制文件
shutil.copy2(file_path, backup_path)
logging.info(f"创建备份: {backup_path}")
return str(backup_path)
except Exception as e:
logging.error(f"创建备份失败: {file_path}, 错误: {e}")
return None
def create_workbook(self, file_path: str, sheet_names: List[str] = None) -> bool:
"""创建新的Excel工作簿"""
try:
if not self.validate_file(file_path):
return False
# 创建新工作簿
workbook = Workbook()
# 移除默认工作表
default_sheet = workbook.active
workbook.remove(default_sheet)
# 添加指定的工作表
if sheet_names:
for sheet_name in sheet_names:
workbook.create_sheet(title=sheet_name)
else:
workbook.create_sheet(title="Sheet1")
# 保存工作簿
return self.save_workbook(workbook, file_path)
except Exception as e:
logging.error(f"创建工作簿失败: {file_path}, 错误: {e}")
return False
def get_file_info(self, file_path: str) -> Dict[str, Any]:
"""获取Excel文件信息"""
try:
if not self.validate_file(file_path):
return {}
workbook = self.load_workbook(file_path, read_only=True)
if not workbook:
return {}
info = {
"file_path": file_path,
"sheet_names": workbook.sheetnames,
"sheet_count": len(workbook.sheetnames),
"file_size": Path(file_path).stat().st_size if Path(file_path).exists() else 0
}
# 获取每个工作表的基本信息
sheets_info = {}
for sheet_name in workbook.sheetnames:
sheet = workbook[sheet_name]
sheets_info[sheet_name] = {
"max_row": sheet.max_row,
"max_column": sheet.max_column,
"title": sheet.title
}
info["sheets_info"] = sheets_info
workbook.close()
return info
except Exception as e:
logging.error(f"获取文件信息失败: {file_path}, 错误: {e}")
return {}
def read_as_dataframe(self, file_path: str, sheet_name: str = None, **kwargs) -> Optional[pd.DataFrame]:
"""将Excel文件读取为pandas DataFrame"""
try:
if not self.validate_file(file_path):
return None
# 使用pandas读取Excel文件
df = pd.read_excel(file_path, sheet_name=sheet_name, **kwargs)
logging.info(f"成功读取为DataFrame: {file_path}, 工作表: {sheet_name}")
return df
except Exception as e:
logging.error(f"读取DataFrame失败: {file_path}, 错误: {e}")
return None
def write_dataframe(self, df: pd.DataFrame, file_path: str, sheet_name: str = "Sheet1", **kwargs) -> bool:
"""将pandas DataFrame写入Excel文件"""
try:
if not self.validate_file(file_path):
return False
# 创建备份
if self.config_manager.is_backup_enabled() and Path(file_path).exists():
self.create_backup(file_path)
# 写入DataFrame
with pd.ExcelWriter(file_path, engine='openpyxl', mode='a' if Path(file_path).exists() else 'w') as writer:
df.to_excel(writer, sheet_name=sheet_name, index=False, **kwargs)
logging.info(f"成功写入DataFrame: {file_path}, 工作表: {sheet_name}")
return True
except Exception as e:
logging.error(f"写入DataFrame失败: {file_path}, 错误: {e}")
return False