Skip to main content
Glama
lin2000wl
by lin2000wl
file_system.py11.4 kB
import glob import os from collections.abc import Callable from dataclasses import dataclass, field from typing import NamedTuple import pathspec from pathspec import PathSpec class ScanResult(NamedTuple): """Result of scanning a directory.""" directories: list[str] files: list[str] def scan_directory( path: str, recursive: bool = False, relative_to: str | None = None, is_ignored_dir: Callable[[str], bool] = lambda x: False, is_ignored_file: Callable[[str], bool] = lambda x: False, ) -> ScanResult: """ :param path: the path to scan :param recursive: whether to recursively scan subdirectories :param relative_to: the path to which the results should be relative to; if None, provide absolute paths :param is_ignored_dir: a function with which to determine whether the given directory (abs. path) shall be ignored :param is_ignored_file: a function with which to determine whether the given file (abs. path) shall be ignored :return: the list of directories and files """ files = [] directories = [] abs_path = os.path.abspath(path) rel_base = os.path.abspath(relative_to) if relative_to else None with os.scandir(abs_path) as entries: for entry in entries: entry_path = entry.path if rel_base: result_path = os.path.relpath(entry_path, rel_base) else: result_path = entry_path if entry.is_file(): if not is_ignored_file(entry_path): files.append(result_path) elif entry.is_dir(): if not is_ignored_dir(entry_path): directories.append(result_path) if recursive: sub_result = scan_directory( entry_path, recursive=True, relative_to=relative_to, is_ignored_dir=is_ignored_dir, is_ignored_file=is_ignored_file, ) files.extend(sub_result.files) directories.extend(sub_result.directories) return ScanResult(directories, files) def find_all_non_ignored_files(repo_root: str) -> list[str]: """ Find all non-ignored files in the repository, respecting all gitignore files in the repository. :param repo_root: The root directory of the repository :return: A list of all non-ignored files in the repository """ gitignore_parser = GitignoreParser(repo_root) _, files = scan_directory(repo_root, recursive=True) return [file for file in files if not gitignore_parser.should_ignore(file)] @dataclass class GitignoreSpec: file_path: str """Path to the gitignore file.""" patterns: list[str] = field(default_factory=list) """List of patterns from the gitignore file. The patterns are adjusted based on the gitignore file location. """ pathspec: PathSpec = field(init=False) """Compiled PathSpec object for pattern matching.""" def __post_init__(self) -> None: """Initialize the PathSpec from patterns.""" self.pathspec = PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, self.patterns) def matches(self, relative_path: str) -> bool: """ Check if the given path matches any pattern in this gitignore spec. :param relative_path: Path to check (should be relative to repo root) :return: True if path matches any pattern """ return match_path(relative_path, self.pathspec, root_path=os.path.dirname(self.file_path)) class GitignoreParser: """ Parser for gitignore files in a repository. This class handles parsing multiple gitignore files throughout a repository and provides methods to check if paths should be ignored. """ def __init__(self, repo_root: str) -> None: """ Initialize the parser for a repository. :param repo_root: Root directory of the repository """ self.repo_root = os.path.abspath(repo_root) self.ignore_specs: list[GitignoreSpec] = [] self._load_gitignore_files() def _load_gitignore_files(self) -> None: """Load all gitignore files from the repository.""" gitignore_files = self._find_gitignore_files() for gitignore_file in gitignore_files: spec = self._create_ignore_spec(gitignore_file) if spec.patterns: # Only add non-empty specs self.ignore_specs.append(spec) def _find_gitignore_files(self) -> list[str]: """ Find all .gitignore files in the repository. :return: List of absolute paths to .gitignore files """ return glob.glob(self.repo_root + "/.gitignore") + glob.glob(self.repo_root + "/**/.gitignore") def _create_ignore_spec(self, gitignore_file_path: str) -> GitignoreSpec: """ Create a GitignoreSpec from a single gitignore file. :param gitignore_file_path: Path to the .gitignore file :return: GitignoreSpec object for the gitignore patterns """ try: with open(gitignore_file_path, encoding="utf-8") as f: content = f.read() except (OSError, UnicodeDecodeError): # If we can't read the file, return an empty spec return GitignoreSpec(gitignore_file_path, []) gitignore_dir = os.path.dirname(gitignore_file_path) patterns = self._parse_gitignore_content(content, gitignore_dir) return GitignoreSpec(gitignore_file_path, patterns) def _parse_gitignore_content(self, content: str, gitignore_dir: str) -> list[str]: """ Parse gitignore content and adjust patterns based on the gitignore file location. :param content: Content of the .gitignore file :param gitignore_dir: Directory containing the .gitignore file (absolute path) :return: List of adjusted patterns """ patterns = [] # Get the relative path from repo root to the gitignore directory rel_dir = os.path.relpath(gitignore_dir, self.repo_root) if rel_dir == ".": rel_dir = "" for line in content.splitlines(): # Strip trailing whitespace (but preserve leading whitespace for now) line = line.rstrip() # Skip empty lines and comments if not line or line.lstrip().startswith("#"): continue # Handle escaped characters at the beginning if line.startswith(("\\#", "\\!")): line = line[1:] # Store whether this is a negation pattern is_negation = line.startswith("!") if is_negation: line = line[1:] # Strip leading/trailing whitespace after removing negation line = line.strip() if not line: continue # Determine if pattern is anchored to the gitignore directory is_anchored = "/" in line[:-1] or line.startswith("/") # Remove leading slash for processing if line.startswith("/"): line = line[1:] # Adjust pattern based on gitignore file location if rel_dir: if is_anchored: # Anchored patterns are relative to the gitignore directory adjusted_pattern = os.path.join(rel_dir, line) else: # Non-anchored patterns can match anywhere below the gitignore directory # We need to preserve this behavior if line.startswith("**/"): adjusted_pattern = line else: # Add the directory prefix but also allow matching in subdirectories adjusted_pattern = os.path.join(rel_dir, "**", line) else: if is_anchored: # Anchored patterns in root should only match at root level # Add leading slash back to indicate root-only matching adjusted_pattern = "/" + line else: # Non-anchored patterns can match anywhere adjusted_pattern = line # Re-add negation if needed if is_negation: adjusted_pattern = "!" + adjusted_pattern # Normalize path separators to forward slashes (gitignore uses forward slashes) adjusted_pattern = adjusted_pattern.replace(os.sep, "/") patterns.append(adjusted_pattern) return patterns def should_ignore(self, path: str) -> bool: """ Check if a path should be ignored based on the gitignore rules. :param path: Path to check (absolute or relative to repo_root) :return: True if the path should be ignored, False otherwise """ # Convert to relative path from repo root if os.path.isabs(path): rel_path = os.path.relpath(path, self.repo_root) else: rel_path = path abs_path = os.path.join(self.repo_root, rel_path) # Normalize path separators rel_path = rel_path.replace(os.sep, "/") if os.path.exists(abs_path) and os.path.isdir(abs_path) and not rel_path.endswith("/"): rel_path = rel_path + "/" # Check against each ignore spec for spec in self.ignore_specs: if spec.matches(rel_path): return True return False def get_ignore_specs(self) -> list[GitignoreSpec]: """ Get all loaded gitignore specs. :return: List of GitignoreSpec objects """ return self.ignore_specs def reload(self) -> None: """Reload all gitignore files from the repository.""" self.ignore_specs.clear() self._load_gitignore_files() def match_path(relative_path: str, path_spec: PathSpec, root_path: str = "") -> bool: """ Match a relative path against a given pathspec. Just pathspec.match_file() is not enough, we need to do some massaging to fix issues with pathspec matching. :param relative_path: relative path to match against the pathspec :param path_spec: the pathspec to match against :param root_path: the root path from which the relative path is derived :return: """ normalized_path = str(relative_path).replace(os.path.sep, "/") # We can have patterns like /src/..., which would only match corresponding paths from the repo root # Unfortunately, pathspec can't know whether a relative path is relative to the repo root or not, # so it will never match src/... # The fix is to just always assume that the input path is relative to the repo root and to # prefix it with /. if not normalized_path.startswith("/"): normalized_path = "/" + normalized_path # pathspec can't handle the matching of directories if they don't end with a slash! # see https://github.com/cpburnz/python-pathspec/issues/89 abs_path = os.path.abspath(os.path.join(root_path, relative_path)) if os.path.isdir(abs_path) and not normalized_path.endswith("/"): normalized_path = normalized_path + "/" return path_spec.match_file(normalized_path)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/lin2000wl/Serena-cursor-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server