Skip to main content
Glama

MCP Memory Service

#!/usr/bin/env python3 """ Orphaned File Detection Script Finds files and directories that may be unused, redundant, or orphaned in the repository. This helps maintain a lean and clean codebase by identifying cleanup candidates. Usage: python scripts/find_orphaned_files.py python scripts/find_orphaned_files.py --include-safe-files python scripts/find_orphaned_files.py --verbose """ import os import re import argparse from pathlib import Path from typing import Set, List, Dict, Tuple from collections import defaultdict class OrphanDetector: def __init__(self, repo_root: Path, include_safe_files: bool = False, verbose: bool = False): self.repo_root = repo_root self.include_safe_files = include_safe_files self.verbose = verbose # Files/dirs to always ignore self.ignore_patterns = { '.git', '.venv', '__pycache__', '.pytest_cache', 'node_modules', '.DS_Store', '.gitignore', '.gitattributes', 'LICENSE', 'CHANGELOG.md', '*.pyc', '*.pyo', '*.egg-info', 'dist', 'build' } # Safe files that are commonly unreferenced but important self.safe_files = { 'README.md', 'pyproject.toml', 'uv.lock', 'setup.py', 'requirements.txt', 'Dockerfile', 'docker-compose.yml', '.dockerignore', 'Makefile', '__init__.py', 'main.py', 'server.py', 'config.py', 'settings.py' } # Extensions that are likely to be referenced self.code_extensions = {'.py', '.js', '.ts', '.sh', '.md', '.yml', '.yaml', '.json'} def should_ignore(self, path: Path) -> bool: """Check if a path should be ignored.""" path_str = str(path) for pattern in self.ignore_patterns: if pattern in path_str or path.name == pattern: return True return False def is_safe_file(self, path: Path) -> bool: """Check if a file is considered 'safe' (commonly unreferenced but important).""" return path.name in self.safe_files def find_all_files(self) -> List[Path]: """Find all files in the repository.""" all_files = [] for root, dirs, files in os.walk(self.repo_root): # Remove ignored directories from dirs list to skip them dirs[:] = [d for d in dirs if not any(ignore in d for ignore in self.ignore_patterns)] for file in files: file_path = Path(root) / file if not self.should_ignore(file_path): all_files.append(file_path) return all_files def extract_references(self, file_path: Path) -> Set[str]: """Extract potential file references from a file.""" references = set() try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() # Find various types of references patterns = [ # Python imports: from module import, import module r'(?:from\s+|import\s+)([a-zA-Z_][a-zA-Z0-9_.]*)', # File paths in quotes r'["\']([^"\']*\.[a-zA-Z0-9]+)["\']', # Common file references r'([a-zA-Z_][a-zA-Z0-9_.-]*\.[a-zA-Z0-9]+)', # Directory references r'([a-zA-Z_][a-zA-Z0-9_-]*/)(?:[a-zA-Z0-9_.-]+)', ] for pattern in patterns: matches = re.findall(pattern, content, re.MULTILINE) references.update(matches) except Exception as e: if self.verbose: print(f"Warning: Could not read {file_path}: {e}") return references def build_reference_map(self, files: List[Path]) -> Dict[str, Set[Path]]: """Build a map of what files reference what.""" reference_map = defaultdict(set) for file_path in files: if file_path.suffix in self.code_extensions: references = self.extract_references(file_path) for ref in references: reference_map[ref].add(file_path) return reference_map def find_orphaned_files(self) -> Tuple[List[Path], List[Path], List[Path]]: """Find potentially orphaned files.""" all_files = self.find_all_files() reference_map = self.build_reference_map(all_files) # Convert file paths to strings for easier matching file_names = {f.name for f in all_files} file_stems = {f.stem for f in all_files} file_paths = {str(f.relative_to(self.repo_root)) for f in all_files} potentially_orphaned = [] safe_unreferenced = [] directories_to_check = [] for file_path in all_files: rel_path = file_path.relative_to(self.repo_root) file_name = file_path.name file_stem = file_path.stem # Check if file is referenced is_referenced = False # Check various forms of references reference_forms = [ file_name, file_stem, str(rel_path), str(rel_path).replace('/', '.'), # Python module style file_stem.replace('_', '-'), # kebab-case variants file_stem.replace('-', '_'), # snake_case variants ] for form in reference_forms: if form in reference_map and reference_map[form]: is_referenced = True break # Special checks for Python files if file_path.suffix == '.py': # Check if it's imported as a module module_path = str(rel_path).replace('/', '.').replace('.py', '') if module_path in reference_map: is_referenced = True # Categorize unreferenced files if not is_referenced: if self.is_safe_file(file_path) and not self.include_safe_files: safe_unreferenced.append(file_path) else: potentially_orphaned.append(file_path) # Check for empty directories for root, dirs, files in os.walk(self.repo_root): dirs[:] = [d for d in dirs if not any(ignore in d for ignore in self.ignore_patterns)] if not dirs and not files: # Empty directory empty_dir = Path(root) if not self.should_ignore(empty_dir): directories_to_check.append(empty_dir) return potentially_orphaned, safe_unreferenced, directories_to_check def find_duplicate_files(self) -> Dict[str, List[Path]]: """Find files with identical names that might be duplicates.""" all_files = self.find_all_files() name_groups = defaultdict(list) for file_path in all_files: name_groups[file_path.name].append(file_path) # Only return groups with multiple files return {name: paths for name, paths in name_groups.items() if len(paths) > 1} def analyze_config_files(self) -> List[Tuple[Path, str]]: """Find potentially redundant configuration files.""" all_files = self.find_all_files() config_files = [] config_patterns = [ (r'.*requirements.*\.txt$', 'Requirements file'), (r'.*requirements.*\.lock$', 'Requirements lock'), (r'.*package.*\.json$', 'Package.json'), (r'.*package.*lock.*\.json$', 'Package lock'), (r'.*\.lock$', 'Lock file'), (r'.*config.*\.(py|json|yaml|yml)$', 'Config file'), (r'.*settings.*\.(py|json|yaml|yml)$', 'Settings file'), (r'.*\.env.*', 'Environment file'), ] for file_path in all_files: rel_path = str(file_path.relative_to(self.repo_root)) for pattern, description in config_patterns: if re.match(pattern, rel_path, re.IGNORECASE): config_files.append((file_path, description)) break return config_files def generate_report(self): """Generate a comprehensive orphan detection report.""" print("🔍 ORPHANED FILE DETECTION REPORT") print("=" * 60) orphaned, safe_unreferenced, empty_dirs = self.find_orphaned_files() duplicates = self.find_duplicate_files() config_files = self.analyze_config_files() # Potentially orphaned files if orphaned: print(f"\n❌ POTENTIALLY ORPHANED FILES ({len(orphaned)}):") for file_path in sorted(orphaned): rel_path = file_path.relative_to(self.repo_root) print(f" 📄 {rel_path}") else: print(f"\n✅ No potentially orphaned files found!") # Safe unreferenced files (if requested) if self.include_safe_files and safe_unreferenced: print(f"\n🟡 SAFE UNREFERENCED FILES ({len(safe_unreferenced)}):") print(" (These are commonly unreferenced but usually important)") for file_path in sorted(safe_unreferenced): rel_path = file_path.relative_to(self.repo_root) print(f" 📄 {rel_path}") # Empty directories if empty_dirs: print(f"\n📁 EMPTY DIRECTORIES ({len(empty_dirs)}):") for dir_path in sorted(empty_dirs): rel_path = dir_path.relative_to(self.repo_root) print(f" 📁 {rel_path}") # Duplicate file names if duplicates: print(f"\n👥 DUPLICATE FILE NAMES ({len(duplicates)} groups):") for name, paths in sorted(duplicates.items()): print(f" 📄 {name}:") for path in sorted(paths): rel_path = path.relative_to(self.repo_root) print(f" - {rel_path}") # Configuration files analysis if config_files: print(f"\n⚙️ CONFIGURATION FILES ({len(config_files)}):") print(" (Review for redundancy)") config_by_type = defaultdict(list) for path, desc in config_files: config_by_type[desc].append(path) for desc, paths in sorted(config_by_type.items()): print(f" {desc}:") for path in sorted(paths): rel_path = path.relative_to(self.repo_root) print(f" - {rel_path}") print(f"\n" + "=" * 60) print(f"📊 SUMMARY:") print(f"Potentially orphaned files: {len(orphaned)}") print(f"Empty directories: {len(empty_dirs)}") print(f"Duplicate name groups: {len(duplicates)}") print(f"Configuration files: {len(config_files)}") if orphaned or empty_dirs: print(f"\n⚠️ Review these files carefully before deletion!") print(f"Some may be important despite not being directly referenced.") else: print(f"\n✅ Repository appears clean with no obvious orphans!") def main(): parser = argparse.ArgumentParser(description='Find orphaned files in the repository') parser.add_argument('--include-safe-files', '-s', action='store_true', help='Include commonly unreferenced but safe files in report') parser.add_argument('--verbose', '-v', action='store_true', help='Show verbose output including warnings') args = parser.parse_args() repo_root = Path(__file__).parent.parent detector = OrphanDetector(repo_root, args.include_safe_files, args.verbose) detector.generate_report() if __name__ == "__main__": main()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/doobidoo/mcp-memory-service'

If you have feedback or need assistance with the MCP directory API, please join our Discord server