Skip to main content
Glama
directory_tool.py13.5 kB
#!/usr/bin/env python3 """ Directory Intelligence Tool A .gitignore-aware directory structure analyzer that generates XML hierarchies with intelligent summarization for large directories. Warning Taxonomy: - unreadable_file: File cannot be opened or read due to permissions or I/O errors - unreadable_directory: Directory cannot be accessed due to permissions or I/O errors - malformed_gitignore: .gitignore file was read successfully but contains invalid patterns - broken_symlink: Symbolic link cannot be resolved to a valid target - symlink_loop: Circular symlink reference detected - too_many_warnings: Warning count exceeded threshold, additional warnings appended """ import os import sys import pathspec from pathlib import Path from typing import Set, List, Tuple, Dict, Any import xml.etree.ElementTree as ET from xml.dom import minidom from fastmcp import FastMCP import logging logger = logging.getLogger(__name__) IGNORE_TOPLEVEL_DOTDIRS = True MAX_FILE_COUNT = 50 class DirectoryIntelligenceTool: """Main class for directory structure analysis and XML generation.""" def __init__(self, root_path: str = "."): """Initialize the tool with a root path.""" self.root_path = Path(root_path).resolve() self.visited_symlinks = set() self.warnings = [] self.config = self._load_config() def _load_config(self) -> Dict[str, Any]: """Load configuration from environment or config file.""" config = { 'max_file_count': MAX_FILE_COUNT, 'expand_large': False, } # Try to load from environment variables import os max_files_str = os.environ.get('DIRECTORY_TOOL_MAX_FILES') if max_files_str: try: config['max_file_count'] = int(max_files_str) except ValueError: pass expand_large_str = os.environ.get('DIRECTORY_TOOL_EXPAND_LARGE') if expand_large_str: config['expand_large'] = expand_large_str.lower() in ('true', '1', 'yes') return config def _sanitize_xml_name(self, name: str) -> str: """Sanitize a name for safe use as XML attribute value.""" # Intentionally restricts filenames/directories to alnum + ._- to guarantee XML attribute safety. # Nonconforming names become "unnamed" due to strictness. # If name is already valid, return it if name and all(c.isalnum() or c in ('.', '_', '-') for c in name): return name # Otherwise, use a safe placeholder return "unnamed" def _should_ignore(self, path: Path, ignore_spec: pathspec.PathSpec, depth: int) -> bool: """Determine if a path should be ignored based on all rules.""" # Check gitignore patterns if self.should_ignore_path(path, ignore_spec): return True # Check top-level dot directories if IGNORE_TOPLEVEL_DOTDIRS and depth == 1 and path.is_dir() and path.name.startswith('.'): return True return False def load_gitignore_patterns(self, directory: Path) -> pathspec.PathSpec: """Load .gitignore patterns from a directory.""" # Check if directory is readable try: if not os.access(directory, os.R_OK): self.warnings.append(f"unreadable_directory: {directory} - Permission denied") except Exception as e: self.warnings.append(f"unreadable_directory: {directory} - {str(e)}") gitignore_path = directory / ".gitignore" patterns = [] # Note: An unreadable .gitignore (I/O or permission failure) is classified under # the "unreadable file" category. A syntactically valid but unparsable pattern set # triggers the "malformed ignore file" category during pattern parsing. if gitignore_path.exists() and gitignore_path.is_file(): try: with open(gitignore_path, 'r', encoding='utf-8') as f: patterns = f.read().splitlines() except Exception as e: self.warnings.append(f"unreadable_file: {gitignore_path} - Permission denied or I/O error: {str(e)}") # Add default patterns to ignore common build/venv directories default_patterns = [ ".git", ".vscode", ".idea", "__pycache__", "*.pyc", "*.pyo", "*.pyd", ".pytest_cache", ".coverage", "htmlcov", ".tox", ".venv", "venv", "env", "ENV", "node_modules", ".DS_Store", "Thumbs.db", "*.egg-info", "dist", "build", "*.so", "*.dylib", "*.dll" ] patterns.extend(default_patterns) try: return pathspec.PathSpec.from_lines("gitwildmatch", patterns) except Exception as e: self.warnings.append(f"malformed_gitignore: {directory} — failed to parse .gitignore: {str(e)}") return pathspec.PathSpec.from_lines("gitwildmatch", []) def should_ignore_path(self, path: Path, ignore_spec: pathspec.PathSpec) -> bool: """Check if a path should be ignored based on gitignore patterns.""" relative_path = path.relative_to(self.root_path) return ignore_spec.match_file(str(relative_path)) def count_directory_files(self, directory: Path, ignore_spec: pathspec.PathSpec, depth: int = 0) -> int: """Count total files in a directory (recursively).""" count = 0 try: for item in directory.iterdir(): if self._should_ignore(item, ignore_spec, depth): logger.debug(f"Skipping ignored path: {item}") continue if item.is_file(): count += 1 elif item.is_dir(): count += self.count_directory_files(item, ignore_spec, depth + 1) except PermissionError: self.warnings.append(f"unreadable_directory: {directory} - Permission denied") except Exception as e: self.warnings.append(f"unreadable_directory: {directory} - {str(e)}") return count def scan_directory(self, directory: Path, ignore_spec: pathspec.PathSpec, expand_large: bool = False, depth: int = 0) -> ET.Element: """Scan a directory and create XML structure.""" dir_element = ET.Element("dir") dir_name = self._sanitize_xml_name(directory.name) dir_element.set("name", dir_name) try: items = list(directory.iterdir()) items.sort(key=lambda x: (x.is_file(), x.name.lower())) # Count files for summarization max_file_count = self.config.get('max_file_count', MAX_FILE_COUNT) if not expand_large and depth > 0: # Only summarize subdirectories total_files = self.count_directory_files(directory, ignore_spec, depth) if total_files > max_file_count: logger.debug(f"Directory {directory} has {total_files} files (threshold: {max_file_count}), adding summary") summary = ET.Element("summary") summary.set("count", str(total_files)) dir_element.append(summary) return dir_element # Process items in directory for item in items: # Check for symlink loops if item.is_symlink(): try: target = item.resolve() if target in self.visited_symlinks: self.warnings.append(f"symlink_loop: {item} -> {target}") continue self.visited_symlinks.add(target) except Exception as e: self.warnings.append(f"broken_symlink: {item} - {str(e)}") continue # Skip ignored paths using consolidated logic if self._should_ignore(item, ignore_spec, depth): logger.debug(f"Skipping ignored path: {item}") continue # Process files if item.is_file(): try: # Try to check if file is readable if not os.access(item, os.R_OK): self.warnings.append(f"unreadable_file: {item} - Permission denied") continue except Exception as e: self.warnings.append(f"unreadable_file: {item} - {str(e)}") continue file_element = ET.Element("file") safe_name = self._sanitize_xml_name(item.name) file_element.text = safe_name dir_element.append(file_element) # Process subdirectories recursively elif item.is_dir(): try: subdir_element = self.scan_directory(item, ignore_spec, expand_large, depth + 1) dir_element.append(subdir_element) except PermissionError: self.warnings.append(f"unreadable_directory: {item} - Permission denied") except Exception as e: self.warnings.append(f"unreadable_directory: {item} - {str(e)}") except PermissionError: self.warnings.append(f"unreadable_directory: {directory} - Permission denied") except Exception as e: self.warnings.append(f"unreadable_directory: {directory} - {str(e)}") return dir_element def generate_xml_structure(self, expand_large: bool = None) -> str: """Generate complete XML structure of the directory.""" self.warnings = [] self.visited_symlinks = set() # Use config default if expand_large not explicitly provided if expand_large is None: expand_large = self.config.get('expand_large', False) # Create root element root = ET.Element("project_structure") # Load gitignore patterns ignore_spec = self.load_gitignore_patterns(self.root_path) # Scan directory structure = self.scan_directory(self.root_path, ignore_spec, expand_large) root.append(structure) # Add warnings if any if self.warnings: # Check if too many warnings # Over-100 warnings are not truncated. Instead, a final warning # "too_many_warnings: truncated" is appended. This behavior matches # the test suite's expectations. if len(self.warnings) > 100: self.warnings.append("too_many_warnings: truncated") warnings_element = ET.Element("warnings") for warning in self.warnings: warning_element = ET.Element("warning") warning_element.text = warning warnings_element.append(warning_element) root.append(warnings_element) # Convert to pretty XML xml_str = ET.tostring(root, encoding='unicode') dom = minidom.parseString(xml_str) return dom.toprettyxml(indent=" ") # Initialize FastMCP mcp = FastMCP("Directory Intelligence Tool") # Note: MCP passes an explicit boolean for expand_large. # Therefore environment/config defaults do not affect calls from MCP unless the wrapper is changed. @mcp.tool() def get_codebase_structure(root_path: str = ".", expand_large: bool = False) -> str: """ Analyze directory structure and generate XML representation. Args: root_path: Root directory path to analyze (default: current directory) expand_large: If True, always expand directories regardless of file count Returns: XML string representing the directory structure """ try: tool = DirectoryIntelligenceTool(root_path) return tool.generate_xml_structure(expand_large) except Exception as e: error_xml = f"""<?xml version="1.0" ?> <project_structure> <error>{str(e)}</error> </project_structure>""" return error_xml def main(): """Main function for command-line usage.""" import argparse parser = argparse.ArgumentParser(description="Directory Intelligence Tool") parser.add_argument("path", nargs="?", default=".", help="Directory path to analyze") parser.add_argument("--expand-large", action="store_true", help="Expand all directories regardless of file count") parser.add_argument("--output", "-o", help="Output file path (default: stdout)") args = parser.parse_args() try: tool = DirectoryIntelligenceTool(args.path) xml_output = tool.generate_xml_structure(args.expand_large) if args.output: with open(args.output, 'w', encoding='utf-8') as f: f.write(xml_output) print(f"XML structure saved to: {args.output}") else: print(xml_output) except Exception as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1) if __name__ == "__main__": main()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/itstanner5216/EliteMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server