import fnmatch
import functools
import re
#
# Globbing helpers
#
@functools.cache
def _is_case_sensitive(flavour):
return flavour.normcase("Aa") == "Aa"
# fnmatch.translate() returns a regular expression that includes a prefix and
# a suffix, which enable matching newlines and ensure the end of the string is
# matched, respectively. These features are undesirable for our implementation
# of PurePatch.match(), which represents path separators as newlines and joins
# pattern segments together. As a workaround, we define a slice object that
# can remove the prefix and suffix from any translate() result. See the
# _compile_pattern_lines() function for more details.
_FNMATCH_PREFIX, _FNMATCH_SUFFIX = fnmatch.translate("_").split("_")
_FNMATCH_SLICE = slice(len(_FNMATCH_PREFIX), -len(_FNMATCH_SUFFIX))
_SWAP_SEP_AND_NEWLINE = {
"/": str.maketrans({"/": "\n", "\n": "/"}),
"\\": str.maketrans({"\\": "\n", "\n": "\\"}),
}
@functools.lru_cache()
def _make_selector(pattern_parts, flavour, case_sensitive):
pat = pattern_parts[0]
if not pat:
return _TerminatingSelector()
if pat == "**":
child_parts_idx = 1
while child_parts_idx < len(pattern_parts) and pattern_parts[child_parts_idx] == "**":
child_parts_idx += 1
child_parts = pattern_parts[child_parts_idx:]
if "**" in child_parts:
cls = _DoubleRecursiveWildcardSelector
else:
cls = _RecursiveWildcardSelector
else:
child_parts = pattern_parts[1:]
if pat == "..":
cls = _ParentSelector
elif "**" in pat:
raise ValueError("Invalid pattern: '**' can only be an entire path component")
else:
cls = _WildcardSelector
return cls(pat, child_parts, flavour, case_sensitive)
@functools.lru_cache(maxsize=256)
def _compile_pattern(pat, case_sensitive):
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
return re.compile(fnmatch.translate(pat), flags).match
@functools.lru_cache()
def _compile_pattern_lines(pattern_lines, case_sensitive):
"""Compile the given pattern lines to an `re.Pattern` object.
The *pattern_lines* argument is a glob-style pattern (e.g. '*/*.py') with
its path separators and newlines swapped (e.g. '*\n*.py`). By using
newlines to separate path components, and not setting `re.DOTALL`, we
ensure that the `*` wildcard cannot match path separators.
The returned `re.Pattern` object may have its `match()` method called to
match a complete pattern, or `search()` to match from the right. The
argument supplied to these methods must also have its path separators and
newlines swapped.
"""
# Match the start of the path, or just after a path separator
parts = ["^"]
for part in pattern_lines.splitlines(keepends=True):
if part == "*\n":
part = r".+\n"
elif part == "*":
part = r".+"
else:
# Any other component: pass to fnmatch.translate(). We slice off
# the common prefix and suffix added by translate() to ensure that
# re.DOTALL is not set, and the end of the string not matched,
# respectively. With DOTALL not set, '*' wildcards will not match
# path separators, because the '.' characters in the pattern will
# not match newlines.
part = fnmatch.translate(part)[_FNMATCH_SLICE]
parts.append(part)
# Match the end of the path, always.
parts.append(r"\Z")
flags = re.MULTILINE
if not case_sensitive:
flags |= re.IGNORECASE
return re.compile("".join(parts), flags=flags)
class _Selector:
"""A selector matches a specific glob pattern part against the children
of a given path."""
def __init__(self, child_parts, flavour, case_sensitive):
self.child_parts = child_parts
if child_parts:
self.successor = _make_selector(child_parts, flavour, case_sensitive)
self.dironly = True
else:
self.successor = _TerminatingSelector()
self.dironly = False
def select_from(self, parent_path):
"""Iterate over all child paths of `parent_path` matched by this
selector. This can contain parent_path itself."""
path_cls = type(parent_path)
scandir = path_cls._scandir
if not parent_path.is_dir():
return iter([])
return self._select_from(parent_path, scandir)
class _TerminatingSelector:
def _select_from(self, parent_path, scandir):
yield parent_path
class _ParentSelector(_Selector):
def __init__(self, name, child_parts, flavour, case_sensitive):
_Selector.__init__(self, child_parts, flavour, case_sensitive)
def _select_from(self, parent_path, scandir):
path = parent_path._make_child_relpath("..")
for p in self.successor._select_from(path, scandir):
yield p
class _WildcardSelector(_Selector):
def __init__(self, pat, child_parts, flavour, case_sensitive):
_Selector.__init__(self, child_parts, flavour, case_sensitive)
if case_sensitive is None:
# TODO: evaluate case-sensitivity of each directory in _select_from()
case_sensitive = _is_case_sensitive(flavour)
self.match = _compile_pattern(pat, case_sensitive)
def _select_from(self, parent_path, scandir):
try:
# We must close the scandir() object before proceeding to
# avoid exhausting file descriptors when globbing deep trees.
with scandir(parent_path) as scandir_it:
entries = list(scandir_it)
except OSError:
pass
else:
for entry in entries:
if self.dironly:
try:
if not entry.is_dir():
continue
except OSError:
continue
name = entry.name
if self.match(name):
path = parent_path._make_child_relpath(name)
for p in self.successor._select_from(path, scandir):
yield p
class _RecursiveWildcardSelector(_Selector):
def __init__(self, pat, child_parts, flavour, case_sensitive):
_Selector.__init__(self, child_parts, flavour, case_sensitive)
def _iterate_directories(self, parent_path):
yield parent_path
for dirpath, dirnames, _ in parent_path.walk():
for dirname in dirnames:
yield dirpath._make_child_relpath(dirname)
def _select_from(self, parent_path, scandir):
successor_select = self.successor._select_from
for starting_point in self._iterate_directories(parent_path):
for p in successor_select(starting_point, scandir):
yield p
class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector):
"""
Like _RecursiveWildcardSelector, but also de-duplicates results from
successive selectors. This is necessary if the pattern contains
multiple non-adjacent '**' segments.
"""
def _select_from(self, parent_path, scandir):
yielded = set()
try:
for p in super()._select_from(parent_path, scandir):
if p not in yielded:
yield p
yielded.add(p)
finally:
yielded.clear()