Skip to main content
Glama
structure_optimizer.py23.3 kB
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 项目结构优化工具 用于清理冗余文件、优化目录结构和提升项目组织性 """ import json import shutil import hashlib from pathlib import Path from typing import Dict, List, Set, Tuple from datetime import datetime import re class StructureOptimizer: """项目结构优化器""" def __init__(self, project_root: str): """初始化结构优化器 Args: project_root: 项目根目录路径 """ self.project_root = Path(project_root) self.backup_dir = self.project_root / "backups" / "structure_optimization" def analyze_structure(self) -> Dict: """分析项目结构 Returns: 结构分析结果 """ print("📊 开始项目结构分析...") analysis = { "timestamp": datetime.now().isoformat(), "duplicate_files": self._find_duplicate_files(), "empty_directories": self._find_empty_directories(), "large_files": self._find_large_files(), "temporary_files": self._find_temporary_files(), "redundant_configs": self._find_redundant_configs(), "unused_assets": self._find_unused_assets(), "structure_suggestions": self._analyze_structure_patterns() } return analysis def _find_duplicate_files(self) -> List[Dict]: """查找重复文件 Returns: 重复文件列表 """ print("🔍 查找重复文件...") file_hashes = {} duplicates = [] # 排除的目录 exclude_dirs = {'.git', '__pycache__', 'node_modules', '.venv', 'venv', 'cache', 'logs'} for file_path in self.project_root.rglob('*'): if file_path.is_file() and not any(part in exclude_dirs for part in file_path.parts): try: # 计算文件哈希 with open(file_path, 'rb') as f: file_hash = hashlib.md5(f.read()).hexdigest() if file_hash in file_hashes: # 找到重复文件 duplicates.append({ 'hash': file_hash, 'original': str(file_hashes[file_hash].relative_to(self.project_root)), 'duplicate': str(file_path.relative_to(self.project_root)), 'size': file_path.stat().st_size }) else: file_hashes[file_hash] = file_path except Exception: continue return duplicates def _find_empty_directories(self) -> List[str]: """查找空目录 Returns: 空目录列表 """ print("📁 查找空目录...") empty_dirs = [] for dir_path in self.project_root.rglob('*'): if dir_path.is_dir(): try: # 检查目录是否为空(忽略隐藏文件) contents = [item for item in dir_path.iterdir() if not item.name.startswith('.')] if not contents: empty_dirs.append(str(dir_path.relative_to(self.project_root))) except Exception: continue return empty_dirs def _find_large_files(self, size_threshold: int = 10 * 1024 * 1024) -> List[Dict]: """查找大文件 Args: size_threshold: 大小阈值(字节),默认10MB Returns: 大文件列表 """ print("📏 查找大文件...") large_files = [] for file_path in self.project_root.rglob('*'): if file_path.is_file(): try: size = file_path.stat().st_size if size > size_threshold: large_files.append({ 'file': str(file_path.relative_to(self.project_root)), 'size': size, 'size_mb': round(size / (1024 * 1024), 2), 'type': file_path.suffix or 'no_extension' }) except Exception: continue # 按大小排序 large_files.sort(key=lambda x: x['size'], reverse=True) return large_files def _find_temporary_files(self) -> List[str]: """查找临时文件 Returns: 临时文件列表 """ print("🗑️ 查找临时文件...") temp_patterns = [ r'.*\.tmp$', r'.*\.temp$', r'.*\.bak$', r'.*\.backup$', r'.*~$', r'.*\.swp$', r'.*\.swo$', r'.*\.log$', r'.*\.cache$', r'.*\.pid$', r'.*\.lock$', r'.*\.orig$', r'.*\.rej$', r'.*\.pyc$', r'.*\.pyo$', r'.*\.pyd$', r'.*__pycache__.*', r'.*\.DS_Store$', r'.*Thumbs\.db$', r'.*\.coverage$', r'.*\.pytest_cache.*', r'.*\.mypy_cache.*', r'.*\.tox.*', r'.*\.nox.*' ] temp_files = [] for file_path in self.project_root.rglob('*'): if file_path.is_file(): file_str = str(file_path.relative_to(self.project_root)) for pattern in temp_patterns: if re.match(pattern, file_str, re.IGNORECASE): temp_files.append(file_str) break return temp_files def _find_redundant_configs(self) -> List[Dict]: """查找冗余配置文件 Returns: 冗余配置文件列表 """ print("⚙️ 查找冗余配置文件...") config_files = { 'mcp_config': [], 'server_config': [], 'test_config': [], 'requirements': [] } # 查找各类配置文件 for file_path in self.project_root.rglob('*'): if file_path.is_file(): name = file_path.name.lower() if 'mcp_config' in name and name.endswith('.json'): config_files['mcp_config'].append(str(file_path.relative_to(self.project_root))) elif 'server' in name and (name.endswith('.py') or name.endswith('.json')): config_files['server_config'].append(str(file_path.relative_to(self.project_root))) elif 'test' in name and name.endswith('.py'): config_files['test_config'].append(str(file_path.relative_to(self.project_root))) elif 'requirements' in name and name.endswith('.txt'): config_files['requirements'].append(str(file_path.relative_to(self.project_root))) # 识别冗余 redundant = [] for config_type, files in config_files.items(): if len(files) > 1: redundant.append({ 'type': config_type, 'files': files, 'count': len(files), 'suggestion': self._get_config_consolidation_suggestion(config_type, files) }) return redundant def _find_unused_assets(self) -> List[str]: """查找未使用的资源文件 Returns: 未使用资源文件列表 """ print("🖼️ 查找未使用的资源文件...") # 查找所有资源文件 asset_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.css', '.js', '.html'} assets = [] for file_path in self.project_root.rglob('*'): if file_path.is_file() and file_path.suffix.lower() in asset_extensions: assets.append(file_path) # 查找代码文件中的引用 referenced_assets = set() code_extensions = {'.py', '.html', '.css', '.js', '.md', '.json', '.yaml', '.yml'} for code_file in self.project_root.rglob('*'): if code_file.is_file() and code_file.suffix.lower() in code_extensions: try: with open(code_file, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() # 查找资源文件引用 for asset in assets: asset_name = asset.name if asset_name in content: referenced_assets.add(asset) except Exception: continue # 找出未引用的资源 unused_assets = [] for asset in assets: if asset not in referenced_assets: # 排除一些特殊文件 if asset.name not in ['favicon.ico', 'robots.txt', 'sitemap.xml']: unused_assets.append(str(asset.relative_to(self.project_root))) return unused_assets def _analyze_structure_patterns(self) -> List[Dict]: """分析结构模式并提供建议 Returns: 结构建议列表 """ print("🏗️ 分析项目结构模式...") suggestions = [] # 检查是否有合适的目录结构 expected_dirs = ['src', 'tests', 'docs', 'config', 'scripts'] missing_dirs = [] for expected_dir in expected_dirs: if not (self.project_root / expected_dir).exists(): missing_dirs.append(expected_dir) if missing_dirs: suggestions.append({ 'type': 'missing_directories', 'description': '缺少标准目录结构', 'missing': missing_dirs, 'suggestion': '考虑创建标准的项目目录结构' }) # 检查文件分布 root_py_files = [f for f in self.project_root.glob('*.py') if f.is_file()] if len(root_py_files) > 5: suggestions.append({ 'type': 'too_many_root_files', 'description': '根目录下Python文件过多', 'count': len(root_py_files), 'files': [f.name for f in root_py_files], 'suggestion': '考虑将Python文件移动到src/或适当的子目录中' }) # 检查测试文件组织 test_files = list(self.project_root.rglob('test_*.py')) + list(self.project_root.rglob('*_test.py')) test_dir_files = list((self.project_root / 'tests').rglob('*.py')) if (self.project_root / 'tests').exists() else [] test_dir_files += list((self.project_root / 'test').rglob('*.py')) if (self.project_root / 'test').exists() else [] scattered_tests = [f for f in test_files if not any(part in ['tests', 'test'] for part in f.parts)] if scattered_tests: suggestions.append({ 'type': 'scattered_test_files', 'description': '测试文件分散在项目中', 'count': len(scattered_tests), 'files': [str(f.relative_to(self.project_root)) for f in scattered_tests], 'suggestion': '考虑将所有测试文件移动到tests/目录中' }) # 检查配置文件组织 config_files = [] for pattern in ['*.json', '*.yaml', '*.yml', '*.toml', '*.ini', '*.cfg']: config_files.extend(self.project_root.glob(pattern)) root_config_files = [f for f in config_files if f.parent == self.project_root] if len(root_config_files) > 3: suggestions.append({ 'type': 'too_many_root_configs', 'description': '根目录下配置文件过多', 'count': len(root_config_files), 'files': [f.name for f in root_config_files], 'suggestion': '考虑将配置文件移动到config/目录中' }) return suggestions def _get_config_consolidation_suggestion(self, config_type: str, files: List[str]) -> str: """获取配置文件整合建议 Args: config_type: 配置类型 files: 文件列表 Returns: 整合建议 """ suggestions = { 'mcp_config': '保留一个主要的MCP配置文件,将其他配置合并或移动到config/目录', 'server_config': '整合服务器配置,保留一个主服务器文件', 'test_config': '将测试文件移动到tests/目录并整合重复的测试', 'requirements': '保留requirements.txt,将其他依赖文件整合到pyproject.toml' } return suggestions.get(config_type, '考虑整合重复的配置文件') def optimize_structure(self, analysis: Dict, auto_fix: bool = False) -> Dict: """优化项目结构 Args: analysis: 结构分析结果 auto_fix: 是否自动修复 Returns: 优化结果 """ print("🔧 开始结构优化...") optimization_results = { 'timestamp': datetime.now().isoformat(), 'actions_taken': [], 'manual_actions_needed': [], 'backup_created': False } if auto_fix: # 创建备份 self._create_backup() optimization_results['backup_created'] = True # 删除临时文件 for temp_file in analysis['temporary_files']: try: file_path = self.project_root / temp_file if file_path.exists(): file_path.unlink() optimization_results['actions_taken'].append(f"删除临时文件: {temp_file}") except Exception as e: print(f"⚠️ 无法删除临时文件 {temp_file}: {e}") # 删除空目录 for empty_dir in analysis['empty_directories']: try: dir_path = self.project_root / empty_dir if dir_path.exists() and dir_path.is_dir(): dir_path.rmdir() optimization_results['actions_taken'].append(f"删除空目录: {empty_dir}") except Exception as e: print(f"⚠️ 无法删除空目录 {empty_dir}: {e}") # 创建建议的目录结构 for suggestion in analysis['structure_suggestions']: if suggestion['type'] == 'missing_directories': for missing_dir in suggestion['missing']: try: dir_path = self.project_root / missing_dir dir_path.mkdir(exist_ok=True) optimization_results['actions_taken'].append(f"创建目录: {missing_dir}") except Exception as e: print(f"⚠️ 无法创建目录 {missing_dir}: {e}") # 添加需要手动处理的项目 for duplicate in analysis['duplicate_files']: optimization_results['manual_actions_needed'].append( f"手动检查重复文件: {duplicate['original']} vs {duplicate['duplicate']}" ) for large_file in analysis['large_files'][:5]: # 只显示前5个最大的文件 optimization_results['manual_actions_needed'].append( f"检查大文件: {large_file['file']} ({large_file['size_mb']} MB)" ) for redundant in analysis['redundant_configs']: optimization_results['manual_actions_needed'].append( f"整合冗余配置: {redundant['type']} - {redundant['suggestion']}" ) return optimization_results def _create_backup(self): """创建项目备份""" print("💾 创建项目备份...") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") backup_path = self.backup_dir / f"backup_{timestamp}" # 创建备份目录 backup_path.mkdir(parents=True, exist_ok=True) # 备份重要文件 important_files = [ 'requirements.txt', 'pyproject.toml', 'README.md', 'config/', 'scripts/' ] for item in important_files: source = self.project_root / item if source.exists(): dest = backup_path / item try: if source.is_file(): dest.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(source, dest) elif source.is_dir(): shutil.copytree(source, dest, dirs_exist_ok=True) except Exception as e: print(f"⚠️ 备份失败 {item}: {e}") print(f"✅ 备份已创建: {backup_path}") def generate_optimization_report(self, analysis: Dict, optimization_results: Dict = None) -> str: """生成优化报告 Args: analysis: 分析结果 optimization_results: 优化结果 Returns: 优化报告内容 """ report = f""" # 项目结构优化报告 生成时间: {analysis['timestamp']} ## 分析概览 - 重复文件: {len(analysis['duplicate_files'])} 组 - 空目录: {len(analysis['empty_directories'])} 个 - 大文件: {len(analysis['large_files'])} 个 - 临时文件: {len(analysis['temporary_files'])} 个 - 冗余配置: {len(analysis['redundant_configs'])} 组 - 未使用资源: {len(analysis['unused_assets'])} 个 - 结构建议: {len(analysis['structure_suggestions'])} 项 ## 重复文件详情 """ for duplicate in analysis['duplicate_files']: report += f""" ### 重复文件组 (大小: {duplicate['size']} 字节) - 原文件: {duplicate['original']} - 重复文件: {duplicate['duplicate']} """ report += "\n## 大文件列表\n" for large_file in analysis['large_files'][:10]: # 只显示前10个 report += f"- {large_file['file']}: {large_file['size_mb']} MB\n" report += "\n## 临时文件列表\n" for temp_file in analysis['temporary_files'][:20]: # 只显示前20个 report += f"- {temp_file}\n" if len(analysis['temporary_files']) > 20: report += f"... 还有 {len(analysis['temporary_files']) - 20} 个临时文件\n" report += "\n## 冗余配置详情\n" for redundant in analysis['redundant_configs']: report += f""" ### {redundant['type']} ({redundant['count']} 个文件) 文件列表: """ for file in redundant['files']: report += f"- {file}\n" report += f"**建议**: {redundant['suggestion']}\n\n" report += "\n## 结构优化建议\n" for suggestion in analysis['structure_suggestions']: report += f""" ### {suggestion['description']} - **类型**: {suggestion['type']} - **建议**: {suggestion['suggestion']} """ if optimization_results: report += f""" ## 优化执行结果 执行时间: {optimization_results['timestamp']} 备份创建: {'是' if optimization_results['backup_created'] else '否'} ### 已执行的操作 """ for action in optimization_results['actions_taken']: report += f"- {action}\n" report += "\n### 需要手动处理的项目\n" for action in optimization_results['manual_actions_needed']: report += f"- {action}\n" return report def save_optimization_report(self, analysis: Dict, optimization_results: Dict = None, output_file: str = None): """保存优化报告 Args: analysis: 分析结果 optimization_results: 优化结果 output_file: 输出文件路径 """ if output_file is None: output_file = self.project_root / "structure_optimization_report.md" report = self.generate_optimization_report(analysis, optimization_results) with open(output_file, 'w', encoding='utf-8') as f: f.write(report) print(f"📊 优化报告已保存到: {output_file}") # 同时保存JSON格式的详细数据 json_data = { 'analysis': analysis, 'optimization_results': optimization_results } json_file = str(output_file).replace('.md', '.json') with open(json_file, 'w', encoding='utf-8') as f: json.dump(json_data, f, indent=2, ensure_ascii=False) print(f"📊 详细数据已保存到: {json_file}") def main(): """主函数""" project_root = Path.cwd() print("🚀 开始项目结构优化...") optimizer = StructureOptimizer(str(project_root)) # 执行分析 analysis = optimizer.analyze_structure() # 执行优化(自动修复安全的操作) optimization_results = optimizer.optimize_structure(analysis, auto_fix=True) # 生成并保存报告 optimizer.save_optimization_report(analysis, optimization_results) # 打印摘要 print("\n📊 结构分析摘要:") print(f"- 重复文件: {len(analysis['duplicate_files'])} 组") print(f"- 空目录: {len(analysis['empty_directories'])} 个") print(f"- 大文件: {len(analysis['large_files'])} 个") print(f"- 临时文件: {len(analysis['temporary_files'])} 个") print(f"- 冗余配置: {len(analysis['redundant_configs'])} 组") print(f"- 未使用资源: {len(analysis['unused_assets'])} 个") print("\n🔧 优化执行摘要:") print(f"- 已执行操作: {len(optimization_results['actions_taken'])} 项") print(f"- 需手动处理: {len(optimization_results['manual_actions_needed'])} 项") print(f"- 备份已创建: {'是' if optimization_results['backup_created'] else '否'}") if __name__ == "__main__": main()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Lillard01/chatExcel-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server