diff --git a/Lib/pathlib.py b/Lib/pathlib.py index f4aab1c0ce..bd5a096f9e 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1,3 +1,10 @@ +"""Object-oriented filesystem paths. + +This module provides classes to represent abstract paths and concrete +paths with operations that have semantics appropriate for different +operating systems. +""" + import fnmatch import functools import io @@ -8,8 +15,7 @@ import sys import warnings from _collections_abc import Sequence -from errno import EINVAL, ENOENT, ENOTDIR, EBADF, ELOOP -from operator import attrgetter +from errno import ENOENT, ENOTDIR, EBADF, ELOOP from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from urllib.parse import quote_from_bytes as urlquote_from_bytes @@ -23,12 +29,20 @@ # Internals # +# Reference for Windows paths can be found at +# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file . +_WIN_RESERVED_NAMES = frozenset( + {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | + {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} | + {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'} +) + _WINERROR_NOT_READY = 21 # drive exists but is not accessible _WINERROR_INVALID_NAME = 123 # fix for bpo-35306 _WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself # EBADF - guard against macOS `stat` throwing EBADF -_IGNORED_ERROS = (ENOENT, ENOTDIR, EBADF, ELOOP) +_IGNORED_ERRNOS = (ENOENT, ENOTDIR, EBADF, ELOOP) _IGNORED_WINERRORS = ( _WINERROR_NOT_READY, @@ -36,363 +50,112 @@ _WINERROR_CANT_RESOLVE_FILENAME) def _ignore_error(exception): - # XXX RUSTPYTHON: added check for FileNotFoundError, file.exists() on windows throws it - # but with a errno==ESRCH for some reason - return (isinstance(exception, FileNotFoundError) or - getattr(exception, 'errno', None) in _IGNORED_ERROS or + return (getattr(exception, 'errno', None) in _IGNORED_ERRNOS or getattr(exception, 'winerror', None) in _IGNORED_WINERRORS) -def _is_wildcard_pattern(pat): - # Whether this pattern needs actual matching using fnmatch, or can - # be looked up directly as a file. - return "*" in pat or "?" in pat or "[" in pat - +@functools.cache +def _is_case_sensitive(flavour): + return flavour.normcase('Aa') == 'Aa' -class _Flavour(object): - """A flavour implements a particular (platform-specific) set of path - semantics.""" +# +# Globbing helpers +# - def __init__(self): - self.join = self.sep.join - def parse_parts(self, parts): - parsed = [] - sep = self.sep - altsep = self.altsep - drv = root = '' - it = reversed(parts) - for part in it: - if not part: - continue - if altsep: - part = part.replace(altsep, sep) - drv, root, rel = self.splitroot(part) - if sep in rel: - for x in reversed(rel.split(sep)): - if x and x != '.': - parsed.append(sys.intern(x)) - else: - if rel and rel != '.': - parsed.append(sys.intern(rel)) - if drv or root: - if not drv: - # If no drive is present, try to find one in the previous - # parts. This makes the result of parsing e.g. - # ("C:", "/", "a") reasonably intuitive. - for part in it: - if not part: - continue - if altsep: - part = part.replace(altsep, sep) - drv = self.splitroot(part)[0] - if drv: - break - break - if drv or root: - parsed.append(drv + root) - parsed.reverse() - return drv, root, parsed +# fnmatch.translate() returns a regular expression that includes a prefix and +# a suffix, which enable matching newlines and ensure the end of the string is +# matched, respectively. These features are undesirable for our implementation +# of PurePatch.match(), which represents path separators as newlines and joins +# pattern segments together. As a workaround, we define a slice object that +# can remove the prefix and suffix from any translate() result. See the +# _compile_pattern_lines() function for more details. +_FNMATCH_PREFIX, _FNMATCH_SUFFIX = fnmatch.translate('_').split('_') +_FNMATCH_SLICE = slice(len(_FNMATCH_PREFIX), -len(_FNMATCH_SUFFIX)) +_SWAP_SEP_AND_NEWLINE = { + '/': str.maketrans({'/': '\n', '\n': '/'}), + '\\': str.maketrans({'\\': '\n', '\n': '\\'}), +} - def join_parsed_parts(self, drv, root, parts, drv2, root2, parts2): - """ - Join the two paths represented by the respective - (drive, root, parts) tuples. Return a new (drive, root, parts) tuple. - """ - if root2: - if not drv2 and drv: - return drv, root2, [drv + root2] + parts2[1:] - elif drv2: - if drv2 == drv or self.casefold(drv2) == self.casefold(drv): - # Same drive => second path is relative to the first - return drv, root, parts + parts2[1:] - else: - # Second path is non-anchored (common case) - return drv, root, parts + parts2 - return drv2, root2, parts2 - - -class _WindowsFlavour(_Flavour): - # Reference for Windows paths can be found at - # http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx - - sep = '\\' - altsep = '/' - has_drv = True - pathmod = ntpath - - is_supported = (os.name == 'nt') - - drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') - ext_namespace_prefix = '\\\\?\\' - - reserved_names = ( - {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | - {'COM%s' % c for c in '123456789\xb9\xb2\xb3'} | - {'LPT%s' % c for c in '123456789\xb9\xb2\xb3'} - ) - - # Interesting findings about extended paths: - # * '\\?\c:\a' is an extended path, which bypasses normal Windows API - # path processing. Thus relative paths are not resolved and slash is not - # translated to backslash. It has the native NT path limit of 32767 - # characters, but a bit less after resolving device symbolic links, - # such as '\??\C:' => '\Device\HarddiskVolume2'. - # * '\\?\c:/a' looks for a device named 'C:/a' because slash is a - # regular name character in the object namespace. - # * '\\?\c:\foo/bar' is invalid because '/' is illegal in NT filesystems. - # The only path separator at the filesystem level is backslash. - # * '//?/c:\a' and '//?/c:/a' are effectively equivalent to '\\.\c:\a' and - # thus limited to MAX_PATH. - # * Prior to Windows 8, ANSI API bytes paths are limited to MAX_PATH, - # even with the '\\?\' prefix. - - def splitroot(self, part, sep=sep): - first = part[0:1] - second = part[1:2] - if (second == sep and first == sep): - # XXX extended paths should also disable the collapsing of "." - # components (according to MSDN docs). - prefix, part = self._split_extended_path(part) - first = part[0:1] - second = part[1:2] - else: - prefix = '' - third = part[2:3] - if (second == sep and first == sep and third != sep): - # is a UNC path: - # vvvvvvvvvvvvvvvvvvvvv root - # \\machine\mountpoint\directory\etc\... - # directory ^^^^^^^^^^^^^^ - index = part.find(sep, 2) - if index != -1: - index2 = part.find(sep, index + 1) - # a UNC path can't have two slashes in a row - # (after the initial two) - if index2 != index + 1: - if index2 == -1: - index2 = len(part) - if prefix: - return prefix + part[1:index2], sep, part[index2+1:] - else: - return part[:index2], sep, part[index2+1:] - drv = root = '' - if second == ':' and first in self.drive_letters: - drv = part[:2] - part = part[2:] - first = third - if first == sep: - root = first - part = part.lstrip(sep) - return prefix + drv, root, part - - def casefold(self, s): - return s.lower() - - def casefold_parts(self, parts): - return [p.lower() for p in parts] - - def compile_pattern(self, pattern): - return re.compile(fnmatch.translate(pattern), re.IGNORECASE).fullmatch - - def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix): - prefix = '' - if s.startswith(ext_prefix): - prefix = s[:4] - s = s[4:] - if s.startswith('UNC\\'): - prefix += s[:3] - s = '\\' + s[3:] - return prefix, s - - def is_reserved(self, parts): - # NOTE: the rules for reserved names seem somewhat complicated - # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not - # exist). We err on the side of caution and return True for paths - # which are not considered reserved by Windows. - if not parts: - return False - if parts[0].startswith('\\\\'): - # UNC paths are never reserved - return False - name = parts[-1].partition('.')[0].partition(':')[0].rstrip(' ') - return name.upper() in self.reserved_names - def make_uri(self, path): - # Under Windows, file URIs use the UTF-8 encoding. - drive = path.drive - if len(drive) == 2 and drive[1] == ':': - # It's a path on a local drive => 'file:///c:/a/b' - rest = path.as_posix()[2:].lstrip('/') - return 'file:///%s/%s' % ( - drive, urlquote_from_bytes(rest.encode('utf-8'))) - else: - # It's a path on a network drive => 'file://host/share/a/b' - return 'file:' + urlquote_from_bytes(path.as_posix().encode('utf-8')) - - -class _PosixFlavour(_Flavour): - sep = '/' - altsep = '' - has_drv = False - pathmod = posixpath - - is_supported = (os.name != 'nt') - - def splitroot(self, part, sep=sep): - if part and part[0] == sep: - stripped_part = part.lstrip(sep) - # According to POSIX path resolution: - # http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap04.html#tag_04_11 - # "A pathname that begins with two successive slashes may be - # interpreted in an implementation-defined manner, although more - # than two leading slashes shall be treated as a single slash". - if len(part) - len(stripped_part) == 2: - return '', sep * 2, stripped_part - else: - return '', sep, stripped_part +@functools.lru_cache() +def _make_selector(pattern_parts, flavour, case_sensitive): + pat = pattern_parts[0] + if not pat: + return _TerminatingSelector() + if pat == '**': + child_parts_idx = 1 + while child_parts_idx < len(pattern_parts) and pattern_parts[child_parts_idx] == '**': + child_parts_idx += 1 + child_parts = pattern_parts[child_parts_idx:] + if '**' in child_parts: + cls = _DoubleRecursiveWildcardSelector else: - return '', '', part - - def casefold(self, s): - return s - - def casefold_parts(self, parts): - return parts - - def compile_pattern(self, pattern): - return re.compile(fnmatch.translate(pattern)).fullmatch - - def is_reserved(self, parts): - return False - - def make_uri(self, path): - # We represent the path using the local filesystem encoding, - # for portability to other applications. - bpath = bytes(path) - return 'file://' + urlquote_from_bytes(bpath) - - -_windows_flavour = _WindowsFlavour() -_posix_flavour = _PosixFlavour() - - -class _Accessor: - """An accessor implements a particular (system-specific or not) way of - accessing paths on the filesystem.""" - - -class _NormalAccessor(_Accessor): - - stat = os.stat - - open = io.open - - listdir = os.listdir - - scandir = os.scandir - - chmod = os.chmod - - mkdir = os.mkdir - - unlink = os.unlink - - if hasattr(os, "link"): - link = os.link - else: - def link(self, src, dst): - raise NotImplementedError("os.link() not available on this system") - - rmdir = os.rmdir - - rename = os.rename - - replace = os.replace - - if hasattr(os, "symlink"): - symlink = os.symlink - else: - def symlink(self, src, dst, target_is_directory=False): - raise NotImplementedError("os.symlink() not available on this system") - - def touch(self, path, mode=0o666, exist_ok=True): - if exist_ok: - # First try to bump modification time - # Implementation note: GNU touch uses the UTIME_NOW option of - # the utimensat() / futimens() functions. - try: - os.utime(path, None) - except OSError: - # Avoid exception chaining - pass - else: - return - flags = os.O_CREAT | os.O_WRONLY - if not exist_ok: - flags |= os.O_EXCL - fd = os.open(path, flags, mode) - os.close(fd) - - if hasattr(os, "readlink"): - readlink = os.readlink + cls = _RecursiveWildcardSelector else: - def readlink(self, path): - raise NotImplementedError("os.readlink() not available on this system") - - def owner(self, path): - try: - import pwd - return pwd.getpwuid(self.stat(path).st_uid).pw_name - except ImportError: - raise NotImplementedError("Path.owner() is unsupported on this system") - - def group(self, path): - try: - import grp - return grp.getgrgid(self.stat(path).st_gid).gr_name - except ImportError: - raise NotImplementedError("Path.group() is unsupported on this system") - - getcwd = os.getcwd - - expanduser = staticmethod(os.path.expanduser) + child_parts = pattern_parts[1:] + if pat == '..': + cls = _ParentSelector + elif '**' in pat: + raise ValueError("Invalid pattern: '**' can only be an entire path component") + else: + cls = _WildcardSelector + return cls(pat, child_parts, flavour, case_sensitive) - realpath = staticmethod(os.path.realpath) +@functools.lru_cache(maxsize=256) +def _compile_pattern(pat, case_sensitive): + flags = re.NOFLAG if case_sensitive else re.IGNORECASE + return re.compile(fnmatch.translate(pat), flags).match -_normal_accessor = _NormalAccessor() +@functools.lru_cache() +def _compile_pattern_lines(pattern_lines, case_sensitive): + """Compile the given pattern lines to an `re.Pattern` object. -# -# Globbing helpers -# + The *pattern_lines* argument is a glob-style pattern (e.g. '*/*.py') with + its path separators and newlines swapped (e.g. '*\n*.py`). By using + newlines to separate path components, and not setting `re.DOTALL`, we + ensure that the `*` wildcard cannot match path separators. -def _make_selector(pattern_parts, flavour): - pat = pattern_parts[0] - child_parts = pattern_parts[1:] - if pat == '**': - cls = _RecursiveWildcardSelector - elif '**' in pat: - raise ValueError("Invalid pattern: '**' can only be an entire path component") - elif _is_wildcard_pattern(pat): - cls = _WildcardSelector - else: - cls = _PreciseSelector - return cls(pat, child_parts, flavour) + The returned `re.Pattern` object may have its `match()` method called to + match a complete pattern, or `search()` to match from the right. The + argument supplied to these methods must also have its path separators and + newlines swapped. + """ -if hasattr(functools, "lru_cache"): - _make_selector = functools.lru_cache()(_make_selector) + # Match the start of the path, or just after a path separator + parts = ['^'] + for part in pattern_lines.splitlines(keepends=True): + if part == '*\n': + part = r'.+\n' + elif part == '*': + part = r'.+' + else: + # Any other component: pass to fnmatch.translate(). We slice off + # the common prefix and suffix added by translate() to ensure that + # re.DOTALL is not set, and the end of the string not matched, + # respectively. With DOTALL not set, '*' wildcards will not match + # path separators, because the '.' characters in the pattern will + # not match newlines. + part = fnmatch.translate(part)[_FNMATCH_SLICE] + parts.append(part) + # Match the end of the path, always. + parts.append(r'\Z') + flags = re.MULTILINE + if not case_sensitive: + flags |= re.IGNORECASE + return re.compile(''.join(parts), flags=flags) class _Selector: """A selector matches a specific glob pattern part against the children of a given path.""" - def __init__(self, child_parts, flavour): + def __init__(self, child_parts, flavour, case_sensitive): self.child_parts = child_parts if child_parts: - self.successor = _make_selector(child_parts, flavour) + self.successor = _make_selector(child_parts, flavour, case_sensitive) self.dironly = True else: self.successor = _TerminatingSelector() @@ -402,105 +165,95 @@ def select_from(self, parent_path): """Iterate over all child paths of `parent_path` matched by this selector. This can contain parent_path itself.""" path_cls = type(parent_path) - is_dir = path_cls.is_dir - exists = path_cls.exists - scandir = parent_path._accessor.scandir - if not is_dir(parent_path): + scandir = path_cls._scandir + if not parent_path.is_dir(): return iter([]) - return self._select_from(parent_path, is_dir, exists, scandir) + return self._select_from(parent_path, scandir) class _TerminatingSelector: - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): yield parent_path -class _PreciseSelector(_Selector): +class _ParentSelector(_Selector): - def __init__(self, name, child_parts, flavour): - self.name = name - _Selector.__init__(self, child_parts, flavour) + def __init__(self, name, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) - def _select_from(self, parent_path, is_dir, exists, scandir): - try: - path = parent_path._make_child_relpath(self.name) - if (is_dir if self.dironly else exists)(path): - for p in self.successor._select_from(path, is_dir, exists, scandir): - yield p - except PermissionError: - return + def _select_from(self, parent_path, scandir): + path = parent_path._make_child_relpath('..') + for p in self.successor._select_from(path, scandir): + yield p class _WildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour): - self.match = flavour.compile_pattern(pat) - _Selector.__init__(self, child_parts, flavour) + def __init__(self, pat, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) + if case_sensitive is None: + # TODO: evaluate case-sensitivity of each directory in _select_from() + case_sensitive = _is_case_sensitive(flavour) + self.match = _compile_pattern(pat, case_sensitive) - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): try: + # We must close the scandir() object before proceeding to + # avoid exhausting file descriptors when globbing deep trees. with scandir(parent_path) as scandir_it: entries = list(scandir_it) + except OSError: + pass + else: for entry in entries: if self.dironly: try: - # "entry.is_dir()" can raise PermissionError - # in some cases (see bpo-38894), which is not - # among the errors ignored by _ignore_error() if not entry.is_dir(): continue - except OSError as e: - if not _ignore_error(e): - raise + except OSError: continue name = entry.name if self.match(name): path = parent_path._make_child_relpath(name) - for p in self.successor._select_from(path, is_dir, exists, scandir): + for p in self.successor._select_from(path, scandir): yield p - except PermissionError: - return class _RecursiveWildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour): - _Selector.__init__(self, child_parts, flavour) + def __init__(self, pat, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) - def _iterate_directories(self, parent_path, is_dir, scandir): + def _iterate_directories(self, parent_path): yield parent_path - try: - with scandir(parent_path) as scandir_it: - entries = list(scandir_it) - for entry in entries: - entry_is_dir = False - try: - entry_is_dir = entry.is_dir() - except OSError as e: - if not _ignore_error(e): - raise - if entry_is_dir and not entry.is_symlink(): - path = parent_path._make_child_relpath(entry.name) - for p in self._iterate_directories(path, is_dir, scandir): - yield p - except PermissionError: - return + for dirpath, dirnames, _ in parent_path.walk(): + for dirname in dirnames: + yield dirpath._make_child_relpath(dirname) + + def _select_from(self, parent_path, scandir): + successor_select = self.successor._select_from + for starting_point in self._iterate_directories(parent_path): + for p in successor_select(starting_point, scandir): + yield p + + +class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector): + """ + Like _RecursiveWildcardSelector, but also de-duplicates results from + successive selectors. This is necessary if the pattern contains + multiple non-adjacent '**' segments. + """ - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): + yielded = set() try: - yielded = set() - try: - successor_select = self.successor._select_from - for starting_point in self._iterate_directories(parent_path, is_dir, scandir): - for p in successor_select(starting_point, is_dir, exists, scandir): - if p not in yielded: - yield p - yielded.add(p) - finally: - yielded.clear() - except PermissionError: - return + for p in super()._select_from(parent_path, scandir): + if p not in yielded: + yield p + yielded.add(p) + finally: + yielded.clear() # @@ -510,20 +263,16 @@ def _select_from(self, parent_path, is_dir, exists, scandir): class _PathParents(Sequence): """This object provides sequence-like access to the logical ancestors of a path. Don't try to construct it yourself.""" - __slots__ = ('_pathcls', '_drv', '_root', '_parts') + __slots__ = ('_path', '_drv', '_root', '_tail') def __init__(self, path): - # We don't store the instance to avoid reference cycles - self._pathcls = type(path) - self._drv = path._drv - self._root = path._root - self._parts = path._parts + self._path = path + self._drv = path.drive + self._root = path.root + self._tail = path._tail def __len__(self): - if self._drv or self._root: - return len(self._parts) - 1 - else: - return len(self._parts) + return len(self._tail) def __getitem__(self, idx): if isinstance(idx, slice): @@ -533,11 +282,11 @@ def __getitem__(self, idx): raise IndexError(idx) if idx < 0: idx += len(self) - return self._pathcls._from_parsed_parts(self._drv, self._root, - self._parts[:-idx - 1]) + return self._path._from_parsed_parts(self._drv, self._root, + self._tail[:-idx - 1]) def __repr__(self): - return "<{}.parents>".format(self._pathcls.__name__) + return "<{}.parents>".format(type(self._path).__name__) class PurePath(object): @@ -549,12 +298,49 @@ class PurePath(object): PureWindowsPath object. You can also instantiate either of these classes directly, regardless of your system. """ + __slots__ = ( - '_drv', '_root', '_parts', - '_str', '_hash', '_pparts', '_cached_cparts', + # The `_raw_paths` slot stores unnormalized string paths. This is set + # in the `__init__()` method. + '_raw_paths', + + # The `_drv`, `_root` and `_tail_cached` slots store parsed and + # normalized parts of the path. They are set when any of the `drive`, + # `root` or `_tail` properties are accessed for the first time. The + # three-part division corresponds to the result of + # `os.path.splitroot()`, except that the tail is further split on path + # separators (i.e. it is a list of strings), and that the root and + # tail are normalized. + '_drv', '_root', '_tail_cached', + + # The `_str` slot stores the string representation of the path, + # computed from the drive, root and tail when `__str__()` is called + # for the first time. It's used to implement `_str_normcase` + '_str', + + # The `_str_normcase_cached` slot stores the string path with + # normalized case. It is set when the `_str_normcase` property is + # accessed for the first time. It's used to implement `__eq__()` + # `__hash__()`, and `_parts_normcase` + '_str_normcase_cached', + + # The `_parts_normcase_cached` slot stores the case-normalized + # string path after splitting on path separators. It's set when the + # `_parts_normcase` property is accessed for the first time. It's used + # to implement comparison methods like `__lt__()`. + '_parts_normcase_cached', + + # The `_lines_cached` slot stores the string path with path separators + # and newlines swapped. This is used to implement `match()`. + '_lines_cached', + + # The `_hash` slot stores the hash of the case-normalized string + # path. It's set when `__hash__()` is called for the first time. + '_hash', ) + _flavour = os.path - def __new__(cls, *args): + def __new__(cls, *args, **kwargs): """Construct a PurePath from one or several strings and or existing PurePath objects. The strings and path objects are combined so as to yield a canonicalized path, which is incorporated into the @@ -562,64 +348,91 @@ def __new__(cls, *args): """ if cls is PurePath: cls = PureWindowsPath if os.name == 'nt' else PurePosixPath - return cls._from_parts(args) + return object.__new__(cls) def __reduce__(self): # Using the parts tuple helps share interned path parts # when pickling related paths. - return (self.__class__, tuple(self._parts)) - - @classmethod - def _parse_args(cls, args): - # This is useful when you don't want to create an instance, just - # canonicalize some constructor arguments. - parts = [] - for a in args: - if isinstance(a, PurePath): - parts += a._parts - else: - a = os.fspath(a) - if isinstance(a, str): - # Force-cast str subclasses to str (issue #21127) - parts.append(str(a)) + return (self.__class__, self.parts) + + def __init__(self, *args): + paths = [] + for arg in args: + if isinstance(arg, PurePath): + if arg._flavour is ntpath and self._flavour is posixpath: + # GH-103631: Convert separators for backwards compatibility. + paths.extend(path.replace('\\', '/') for path in arg._raw_paths) else: + paths.extend(arg._raw_paths) + else: + try: + path = os.fspath(arg) + except TypeError: + path = arg + if not isinstance(path, str): raise TypeError( - "argument should be a str object or an os.PathLike " - "object returning str, not %r" - % type(a)) - return cls._flavour.parse_parts(parts) + "argument should be a str or an os.PathLike " + "object where __fspath__ returns a str, " + f"not {type(path).__name__!r}") + paths.append(path) + self._raw_paths = paths - @classmethod - def _from_parts(cls, args): - # We need to call _parse_args on the instance, so as to get the - # right flavour. - self = object.__new__(cls) - drv, root, parts = self._parse_args(args) - self._drv = drv - self._root = root - self._parts = parts - return self + def with_segments(self, *pathsegments): + """Construct a new path object from any number of path-like objects. + Subclasses may override this method to customize how new path objects + are created from methods like `iterdir()`. + """ + return type(self)(*pathsegments) @classmethod - def _from_parsed_parts(cls, drv, root, parts): - self = object.__new__(cls) + def _parse_path(cls, path): + if not path: + return '', '', [] + sep = cls._flavour.sep + altsep = cls._flavour.altsep + if altsep: + path = path.replace(altsep, sep) + drv, root, rel = cls._flavour.splitroot(path) + if not root and drv.startswith(sep) and not drv.endswith(sep): + drv_parts = drv.split(sep) + if len(drv_parts) == 4 and drv_parts[2] not in '?.': + # e.g. //server/share + root = sep + elif len(drv_parts) == 6: + # e.g. //?/unc/server/share + root = sep + parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.'] + return drv, root, parsed + + def _load_parts(self): + paths = self._raw_paths + if len(paths) == 0: + path = '' + elif len(paths) == 1: + path = paths[0] + else: + path = self._flavour.join(*paths) + drv, root, tail = self._parse_path(path) self._drv = drv self._root = root - self._parts = parts - return self + self._tail_cached = tail + + def _from_parsed_parts(self, drv, root, tail): + path_str = self._format_parsed_parts(drv, root, tail) + path = self.with_segments(path_str) + path._str = path_str or '.' + path._drv = drv + path._root = root + path._tail_cached = tail + return path @classmethod - def _format_parsed_parts(cls, drv, root, parts): + def _format_parsed_parts(cls, drv, root, tail): if drv or root: - return drv + root + cls._flavour.join(parts[1:]) - else: - return cls._flavour.join(parts) - - def _make_child(self, args): - drv, root, parts = self._parse_args(args) - drv, root, parts = self._flavour.join_parsed_parts( - self._drv, self._root, self._parts, drv, root, parts) - return self._from_parsed_parts(drv, root, parts) + return drv + root + cls._flavour.sep.join(tail) + elif tail and cls._flavour.splitdrive(tail[0])[0]: + tail = ['.'] + tail + return cls._flavour.sep.join(tail) def __str__(self): """Return the string representation of the path, suitable for @@ -627,8 +440,8 @@ def __str__(self): try: return self._str except AttributeError: - self._str = self._format_parsed_parts(self._drv, self._root, - self._parts) or '.' + self._str = self._format_parsed_parts(self.drive, self.root, + self._tail) or '.' return self._str def __fspath__(self): @@ -652,71 +465,128 @@ def as_uri(self): """Return the path as a 'file' URI.""" if not self.is_absolute(): raise ValueError("relative path can't be expressed as a file URI") - return self._flavour.make_uri(self) + + drive = self.drive + if len(drive) == 2 and drive[1] == ':': + # It's a path on a local drive => 'file:///c:/a/b' + prefix = 'file:///' + drive + path = self.as_posix()[2:] + elif drive: + # It's a path on a network drive => 'file://host/share/a/b' + prefix = 'file:' + path = self.as_posix() + else: + # It's a posix path => 'file:///etc/hosts' + prefix = 'file://' + path = str(self) + return prefix + urlquote_from_bytes(os.fsencode(path)) + + @property + def _str_normcase(self): + # String with normalized case, for hashing and equality checks + try: + return self._str_normcase_cached + except AttributeError: + if _is_case_sensitive(self._flavour): + self._str_normcase_cached = str(self) + else: + self._str_normcase_cached = str(self).lower() + return self._str_normcase_cached @property - def _cparts(self): - # Cached casefolded parts, for hashing and comparison + def _parts_normcase(self): + # Cached parts with normalized case, for comparisons. try: - return self._cached_cparts + return self._parts_normcase_cached except AttributeError: - self._cached_cparts = self._flavour.casefold_parts(self._parts) - return self._cached_cparts + self._parts_normcase_cached = self._str_normcase.split(self._flavour.sep) + return self._parts_normcase_cached + + @property + def _lines(self): + # Path with separators and newlines swapped, for pattern matching. + try: + return self._lines_cached + except AttributeError: + path_str = str(self) + if path_str == '.': + self._lines_cached = '' + else: + trans = _SWAP_SEP_AND_NEWLINE[self._flavour.sep] + self._lines_cached = path_str.translate(trans) + return self._lines_cached def __eq__(self, other): if not isinstance(other, PurePath): return NotImplemented - return self._cparts == other._cparts and self._flavour is other._flavour + return self._str_normcase == other._str_normcase and self._flavour is other._flavour def __hash__(self): try: return self._hash except AttributeError: - self._hash = hash(tuple(self._cparts)) + self._hash = hash(self._str_normcase) return self._hash def __lt__(self, other): if not isinstance(other, PurePath) or self._flavour is not other._flavour: return NotImplemented - return self._cparts < other._cparts + return self._parts_normcase < other._parts_normcase def __le__(self, other): if not isinstance(other, PurePath) or self._flavour is not other._flavour: return NotImplemented - return self._cparts <= other._cparts + return self._parts_normcase <= other._parts_normcase def __gt__(self, other): if not isinstance(other, PurePath) or self._flavour is not other._flavour: return NotImplemented - return self._cparts > other._cparts + return self._parts_normcase > other._parts_normcase def __ge__(self, other): if not isinstance(other, PurePath) or self._flavour is not other._flavour: return NotImplemented - return self._cparts >= other._cparts + return self._parts_normcase >= other._parts_normcase - def __class_getitem__(cls, type): - return cls + @property + def drive(self): + """The drive prefix (letter or UNC path), if any.""" + try: + return self._drv + except AttributeError: + self._load_parts() + return self._drv - drive = property(attrgetter('_drv'), - doc="""The drive prefix (letter or UNC path), if any.""") + @property + def root(self): + """The root of the path, if any.""" + try: + return self._root + except AttributeError: + self._load_parts() + return self._root - root = property(attrgetter('_root'), - doc="""The root of the path, if any.""") + @property + def _tail(self): + try: + return self._tail_cached + except AttributeError: + self._load_parts() + return self._tail_cached @property def anchor(self): """The concatenation of the drive and root, or ''.""" - anchor = self._drv + self._root + anchor = self.drive + self.root return anchor @property def name(self): """The final path component, if any.""" - parts = self._parts - if len(parts) == (1 if (self._drv or self._root) else 0): + tail = self._tail + if not tail: return '' - return parts[-1] + return tail[-1] @property def suffix(self): @@ -759,12 +629,11 @@ def with_name(self, name): """Return a new path with the file name changed.""" if not self.name: raise ValueError("%r has an empty name" % (self,)) - drv, root, parts = self._flavour.parse_parts((name,)) - if (not name or name[-1] in [self._flavour.sep, self._flavour.altsep] - or drv or root or len(parts) != 1): + f = self._flavour + if not name or f.sep in name or (f.altsep and f.altsep in name) or name == '.': raise ValueError("Invalid name %r" % (name)) - return self._from_parsed_parts(self._drv, self._root, - self._parts[:-1] + [name]) + return self._from_parsed_parts(self.drive, self.root, + self._tail[:-1] + [name]) def with_stem(self, stem): """Return a new path with the stem changed.""" @@ -788,137 +657,144 @@ def with_suffix(self, suffix): name = name + suffix else: name = name[:-len(old_suffix)] + suffix - return self._from_parsed_parts(self._drv, self._root, - self._parts[:-1] + [name]) + return self._from_parsed_parts(self.drive, self.root, + self._tail[:-1] + [name]) - def relative_to(self, *other): + def relative_to(self, other, /, *_deprecated, walk_up=False): """Return the relative path to another path identified by the passed arguments. If the operation is not possible (because this is not - a subpath of the other path), raise ValueError. - """ - # For the purpose of this method, drive and root are considered - # separate parts, i.e.: - # Path('c:/').relative_to('c:') gives Path('/') - # Path('c:/').relative_to('/') raise ValueError - if not other: - raise TypeError("need at least one argument") - parts = self._parts - drv = self._drv - root = self._root - if root: - abs_parts = [drv, root] + parts[1:] - else: - abs_parts = parts - to_drv, to_root, to_parts = self._parse_args(other) - if to_root: - to_abs_parts = [to_drv, to_root] + to_parts[1:] + related to the other path), raise ValueError. + + The *walk_up* parameter controls whether `..` may be used to resolve + the path. + """ + if _deprecated: + msg = ("support for supplying more than one positional argument " + "to pathlib.PurePath.relative_to() is deprecated and " + "scheduled for removal in Python {remove}") + warnings._deprecated("pathlib.PurePath.relative_to(*args)", msg, + remove=(3, 14)) + other = self.with_segments(other, *_deprecated) + for step, path in enumerate([other] + list(other.parents)): + if self.is_relative_to(path): + break + elif not walk_up: + raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}") + elif path.name == '..': + raise ValueError(f"'..' segment in {str(other)!r} cannot be walked") else: - to_abs_parts = to_parts - n = len(to_abs_parts) - cf = self._flavour.casefold_parts - if (root or drv) if n == 0 else cf(abs_parts[:n]) != cf(to_abs_parts): - formatted = self._format_parsed_parts(to_drv, to_root, to_parts) - raise ValueError("{!r} is not in the subpath of {!r}" - " OR one path is relative and the other is absolute." - .format(str(self), str(formatted))) - return self._from_parsed_parts('', root if n == 1 else '', - abs_parts[n:]) - - def is_relative_to(self, *other): + raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors") + parts = ['..'] * step + self._tail[len(path._tail):] + return self.with_segments(*parts) + + def is_relative_to(self, other, /, *_deprecated): """Return True if the path is relative to another path or False. """ - try: - self.relative_to(*other) - return True - except ValueError: - return False + if _deprecated: + msg = ("support for supplying more than one argument to " + "pathlib.PurePath.is_relative_to() is deprecated and " + "scheduled for removal in Python {remove}") + warnings._deprecated("pathlib.PurePath.is_relative_to(*args)", + msg, remove=(3, 14)) + other = self.with_segments(other, *_deprecated) + return other == self or other in self.parents @property def parts(self): """An object providing sequence-like access to the components in the filesystem path.""" - # We cache the tuple to avoid building a new one each time .parts - # is accessed. XXX is this necessary? - try: - return self._pparts - except AttributeError: - self._pparts = tuple(self._parts) - return self._pparts + if self.drive or self.root: + return (self.drive + self.root,) + tuple(self._tail) + else: + return tuple(self._tail) - def joinpath(self, *args): + def joinpath(self, *pathsegments): """Combine this path with one or several arguments, and return a new path representing either a subpath (if all arguments are relative paths) or a totally different path (if one of the arguments is anchored). """ - return self._make_child(args) + return self.with_segments(self, *pathsegments) def __truediv__(self, key): try: - return self._make_child((key,)) + return self.joinpath(key) except TypeError: return NotImplemented def __rtruediv__(self, key): try: - return self._from_parts([key] + self._parts) + return self.with_segments(key, self) except TypeError: return NotImplemented @property def parent(self): """The logical parent of the path.""" - drv = self._drv - root = self._root - parts = self._parts - if len(parts) == 1 and (drv or root): + drv = self.drive + root = self.root + tail = self._tail + if not tail: return self - return self._from_parsed_parts(drv, root, parts[:-1]) + return self._from_parsed_parts(drv, root, tail[:-1]) @property def parents(self): """A sequence of this path's logical parents.""" + # The value of this property should not be cached on the path object, + # as doing so would introduce a reference cycle. return _PathParents(self) def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, a drive).""" - if not self._root: + if self._flavour is ntpath: + # ntpath.isabs() is defective - see GH-44626. + return bool(self.drive and self.root) + elif self._flavour is posixpath: + # Optimization: work with raw paths on POSIX. + for path in self._raw_paths: + if path.startswith('/'): + return True return False - return not self._flavour.has_drv or bool(self._drv) + else: + return self._flavour.isabs(str(self)) def is_reserved(self): """Return True if the path contains one of the special names reserved by the system, if any.""" - return self._flavour.is_reserved(self._parts) + if self._flavour is posixpath or not self._tail: + return False + + # NOTE: the rules for reserved names seem somewhat complicated + # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not + # exist). We err on the side of caution and return True for paths + # which are not considered reserved by Windows. + if self.drive.startswith('\\\\'): + # UNC paths are never reserved. + return False + name = self._tail[-1].partition('.')[0].partition(':')[0].rstrip(' ') + return name.upper() in _WIN_RESERVED_NAMES - def match(self, path_pattern): + def match(self, path_pattern, *, case_sensitive=None): """ Return True if this path matches the given pattern. """ - cf = self._flavour.casefold - path_pattern = cf(path_pattern) - drv, root, pat_parts = self._flavour.parse_parts((path_pattern,)) - if not pat_parts: + if not isinstance(path_pattern, PurePath): + path_pattern = self.with_segments(path_pattern) + if case_sensitive is None: + case_sensitive = _is_case_sensitive(self._flavour) + pattern = _compile_pattern_lines(path_pattern._lines, case_sensitive) + if path_pattern.drive or path_pattern.root: + return pattern.match(self._lines) is not None + elif path_pattern._tail: + return pattern.search(self._lines) is not None + else: raise ValueError("empty pattern") - if drv and drv != cf(self._drv): - return False - if root and root != cf(self._root): - return False - parts = self._cparts - if drv or root: - if len(pat_parts) != len(parts): - return False - pat_parts = pat_parts[1:] - elif len(pat_parts) > len(parts): - return False - for part, pat in zip(reversed(parts), reversed(pat_parts)): - if not fnmatch.fnmatchcase(part, pat): - return False - return True + # Can't subclass os.PathLike from PurePath and keep the constructor -# optimizations in PurePath._parse_args(). +# optimizations in PurePath.__slots__. os.PathLike.register(PurePath) @@ -928,7 +804,7 @@ class PurePosixPath(PurePath): On a POSIX system, instantiating a PurePath should return this object. However, you can also instantiate it directly on any system. """ - _flavour = _posix_flavour + _flavour = posixpath __slots__ = () @@ -938,7 +814,7 @@ class PureWindowsPath(PurePath): On a Windows system, instantiating a PurePath should return this object. However, you can also instantiate it directly on any system. """ - _flavour = _windows_flavour + _flavour = ntpath __slots__ = () @@ -954,162 +830,177 @@ class Path(PurePath): object. You can also instantiate a PosixPath or WindowsPath directly, but cannot instantiate a WindowsPath on a POSIX system or vice versa. """ - _accessor = _normal_accessor __slots__ = () - def __new__(cls, *args, **kwargs): - if cls is Path: - cls = WindowsPath if os.name == 'nt' else PosixPath - self = cls._from_parts(args) - if not self._flavour.is_supported: - raise NotImplementedError("cannot instantiate %r on your system" - % (cls.__name__,)) - return self - - def _make_child_relpath(self, part): - # This is an optimization used for dir walking. `part` must be - # a single part relative to this path. - parts = self._parts + [part] - return self._from_parsed_parts(self._drv, self._root, parts) + def stat(self, *, follow_symlinks=True): + """ + Return the result of the stat() system call on this path, like + os.stat() does. + """ + return os.stat(self, follow_symlinks=follow_symlinks) - def __enter__(self): - return self + def lstat(self): + """ + Like stat(), except if the path points to a symlink, the symlink's + status information is returned, rather than its target's. + """ + return self.stat(follow_symlinks=False) - def __exit__(self, t, v, tb): - # https://bugs.python.org/issue39682 - # In previous versions of pathlib, this method marked this path as - # closed; subsequent attempts to perform I/O would raise an IOError. - # This functionality was never documented, and had the effect of - # making Path objects mutable, contrary to PEP 428. In Python 3.9 the - # _closed attribute was removed, and this method made a no-op. - # This method and __enter__()/__exit__() should be deprecated and - # removed in the future. - pass - # Public API + # Convenience functions for querying the stat results - @classmethod - def cwd(cls): - """Return a new path pointing to the current working directory - (as returned by os.getcwd()). + def exists(self, *, follow_symlinks=True): """ - return cls(cls._accessor.getcwd()) + Whether this path exists. - @classmethod - def home(cls): - """Return a new path pointing to the user's home directory (as - returned by os.path.expanduser('~')). + This method normally follows symlinks; to check whether a symlink exists, + add the argument follow_symlinks=False. """ - return cls("~").expanduser() + try: + self.stat(follow_symlinks=follow_symlinks) + except OSError as e: + if not _ignore_error(e): + raise + return False + except ValueError: + # Non-encodable path + return False + return True - def samefile(self, other_path): - """Return whether other_path is the same or not as this file - (as returned by os.path.samefile()). + def is_dir(self): + """ + Whether this path is a directory. """ - st = self.stat() try: - other_st = other_path.stat() - except AttributeError: - other_st = self._accessor.stat(other_path) - return os.path.samestat(st, other_st) + return S_ISDIR(self.stat().st_mode) + except OSError as e: + if not _ignore_error(e): + raise + # Path doesn't exist or is a broken symlink + # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) + return False + except ValueError: + # Non-encodable path + return False - def iterdir(self): - """Iterate over the files in this directory. Does not yield any - result for the special paths '.' and '..'. + def is_file(self): """ - for name in self._accessor.listdir(self): - if name in {'.', '..'}: - # Yielding a path object for these makes little sense - continue - yield self._make_child_relpath(name) - - def glob(self, pattern): - """Iterate over this subtree and yield all existing files (of any - kind, including directories) matching the given relative pattern. + Whether this path is a regular file (also True for symlinks pointing + to regular files). """ - sys.audit("pathlib.Path.glob", self, pattern) - if not pattern: - raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - drv, root, pattern_parts = self._flavour.parse_parts((pattern,)) - if drv or root: - raise NotImplementedError("Non-relative patterns are unsupported") - selector = _make_selector(tuple(pattern_parts), self._flavour) - for p in selector.select_from(self): - yield p + try: + return S_ISREG(self.stat().st_mode) + except OSError as e: + if not _ignore_error(e): + raise + # Path doesn't exist or is a broken symlink + # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) + return False + except ValueError: + # Non-encodable path + return False - def rglob(self, pattern): - """Recursively yield all existing files (of any kind, including - directories) matching the given relative pattern, anywhere in - this subtree. + def is_mount(self): """ - sys.audit("pathlib.Path.rglob", self, pattern) - drv, root, pattern_parts = self._flavour.parse_parts((pattern,)) - if drv or root: - raise NotImplementedError("Non-relative patterns are unsupported") - selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour) - for p in selector.select_from(self): - yield p - - def absolute(self): - """Return an absolute version of this path. This function works - even if the path doesn't point to anything. - - No normalization is done, i.e. all '.' and '..' will be kept along. - Use resolve() to get the canonical path to a file. + Check if this path is a mount point """ - # XXX untested yet! - if self.is_absolute(): - return self - # FIXME this must defer to the specific flavour (and, under Windows, - # use nt._getfullpathname()) - return self._from_parts([self._accessor.getcwd()] + self._parts) + return self._flavour.ismount(self) - def resolve(self, strict=False): + def is_symlink(self): """ - Make the path absolute, resolving all symlinks on the way and also - normalizing it (for example turning slashes into backslashes under - Windows). + Whether this path is a symbolic link. """ + try: + return S_ISLNK(self.lstat().st_mode) + except OSError as e: + if not _ignore_error(e): + raise + # Path doesn't exist + return False + except ValueError: + # Non-encodable path + return False - def check_eloop(e): - winerror = getattr(e, 'winerror', 0) - if e.errno == ELOOP or winerror == _WINERROR_CANT_RESOLVE_FILENAME: - raise RuntimeError("Symlink loop from %r" % e.filename) + def is_junction(self): + """ + Whether this path is a junction. + """ + return self._flavour.isjunction(self) + def is_block_device(self): + """ + Whether this path is a block device. + """ try: - s = self._accessor.realpath(self, strict=strict) + return S_ISBLK(self.stat().st_mode) except OSError as e: - check_eloop(e) - raise - p = self._from_parts((s,)) - - # In non-strict mode, realpath() doesn't raise on symlink loops. - # Ensure we get an exception by calling stat() - if not strict: - try: - p.stat() - except OSError as e: - check_eloop(e) - return p + if not _ignore_error(e): + raise + # Path doesn't exist or is a broken symlink + # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) + return False + except ValueError: + # Non-encodable path + return False - def stat(self, *, follow_symlinks=True): + def is_char_device(self): """ - Return the result of the stat() system call on this path, like - os.stat() does. + Whether this path is a character device. """ - return self._accessor.stat(self, follow_symlinks=follow_symlinks) + try: + return S_ISCHR(self.stat().st_mode) + except OSError as e: + if not _ignore_error(e): + raise + # Path doesn't exist or is a broken symlink + # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) + return False + except ValueError: + # Non-encodable path + return False - def owner(self): + def is_fifo(self): """ - Return the login name of the file owner. + Whether this path is a FIFO. """ - return self._accessor.owner(self) + try: + return S_ISFIFO(self.stat().st_mode) + except OSError as e: + if not _ignore_error(e): + raise + # Path doesn't exist or is a broken symlink + # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) + return False + except ValueError: + # Non-encodable path + return False - def group(self): + def is_socket(self): """ - Return the group name of the file gid. + Whether this path is a socket. + """ + try: + return S_ISSOCK(self.stat().st_mode) + except OSError as e: + if not _ignore_error(e): + raise + # Path doesn't exist or is a broken symlink + # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) + return False + except ValueError: + # Non-encodable path + return False + + def samefile(self, other_path): + """Return whether other_path is the same or not as this file + (as returned by os.path.samefile()). """ - return self._accessor.group(self) + st = self.stat() + try: + other_st = other_path.stat() + except AttributeError: + other_st = self.with_segments(other_path).stat() + return self._flavour.samestat(st, other_st) def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): @@ -1119,8 +1010,7 @@ def open(self, mode='r', buffering=-1, encoding=None, """ if "b" not in mode: encoding = io.text_encoding(encoding) - return self._accessor.open(self, mode, buffering, encoding, errors, - newline) + return io.open(self, mode, buffering, encoding, errors, newline) def read_bytes(self): """ @@ -1157,25 +1047,268 @@ def write_text(self, data, encoding=None, errors=None, newline=None): with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: return f.write(data) + def iterdir(self): + """Yield path objects of the directory contents. + + The children are yielded in arbitrary order, and the + special entries '.' and '..' are not included. + """ + for name in os.listdir(self): + yield self._make_child_relpath(name) + + def _scandir(self): + # bpo-24132: a future version of pathlib will support subclassing of + # pathlib.Path to customize how the filesystem is accessed. This + # includes scandir(), which is used to implement glob(). + return os.scandir(self) + + def _make_child_relpath(self, name): + path_str = str(self) + tail = self._tail + if tail: + path_str = f'{path_str}{self._flavour.sep}{name}' + elif path_str != '.': + path_str = f'{path_str}{name}' + else: + path_str = name + path = self.with_segments(path_str) + path._str = path_str + path._drv = self.drive + path._root = self.root + path._tail_cached = tail + [name] + return path + + def glob(self, pattern, *, case_sensitive=None): + """Iterate over this subtree and yield all existing files (of any + kind, including directories) matching the given relative pattern. + """ + sys.audit("pathlib.Path.glob", self, pattern) + if not pattern: + raise ValueError("Unacceptable pattern: {!r}".format(pattern)) + drv, root, pattern_parts = self._parse_path(pattern) + if drv or root: + raise NotImplementedError("Non-relative patterns are unsupported") + if pattern[-1] in (self._flavour.sep, self._flavour.altsep): + pattern_parts.append('') + selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive) + for p in selector.select_from(self): + yield p + + def rglob(self, pattern, *, case_sensitive=None): + """Recursively yield all existing files (of any kind, including + directories) matching the given relative pattern, anywhere in + this subtree. + """ + sys.audit("pathlib.Path.rglob", self, pattern) + drv, root, pattern_parts = self._parse_path(pattern) + if drv or root: + raise NotImplementedError("Non-relative patterns are unsupported") + if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep): + pattern_parts.append('') + selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive) + for p in selector.select_from(self): + yield p + + def walk(self, top_down=True, on_error=None, follow_symlinks=False): + """Walk the directory tree from this directory, similar to os.walk().""" + sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) + paths = [self] + + while paths: + path = paths.pop() + if isinstance(path, tuple): + yield path + continue + + # We may not have read permission for self, in which case we can't + # get a list of the files the directory contains. os.walk() + # always suppressed the exception in that instance, rather than + # blow up for a minor reason when (say) a thousand readable + # directories are still left to visit. That logic is copied here. + try: + scandir_it = path._scandir() + except OSError as error: + if on_error is not None: + on_error(error) + continue + + with scandir_it: + dirnames = [] + filenames = [] + for entry in scandir_it: + try: + is_dir = entry.is_dir(follow_symlinks=follow_symlinks) + except OSError: + # Carried over from os.path.isdir(). + is_dir = False + + if is_dir: + dirnames.append(entry.name) + else: + filenames.append(entry.name) + + if top_down: + yield path, dirnames, filenames + else: + paths.append((path, dirnames, filenames)) + + paths += [path._make_child_relpath(d) for d in reversed(dirnames)] + + def __init__(self, *args, **kwargs): + if kwargs: + msg = ("support for supplying keyword arguments to pathlib.PurePath " + "is deprecated and scheduled for removal in Python {remove}") + warnings._deprecated("pathlib.PurePath(**kwargs)", msg, remove=(3, 14)) + super().__init__(*args) + + def __new__(cls, *args, **kwargs): + if cls is Path: + cls = WindowsPath if os.name == 'nt' else PosixPath + return object.__new__(cls) + + def __enter__(self): + # In previous versions of pathlib, __exit__() marked this path as + # closed; subsequent attempts to perform I/O would raise an IOError. + # This functionality was never documented, and had the effect of + # making Path objects mutable, contrary to PEP 428. + # In Python 3.9 __exit__() was made a no-op. + # In Python 3.11 __enter__() began emitting DeprecationWarning. + # In Python 3.13 __enter__() and __exit__() should be removed. + warnings.warn("pathlib.Path.__enter__() is deprecated and scheduled " + "for removal in Python 3.13; Path objects as a context " + "manager is a no-op", + DeprecationWarning, stacklevel=2) + return self + + def __exit__(self, t, v, tb): + pass + + # Public API + + @classmethod + def cwd(cls): + """Return a new path pointing to the current working directory.""" + # We call 'absolute()' rather than using 'os.getcwd()' directly to + # enable users to replace the implementation of 'absolute()' in a + # subclass and benefit from the new behaviour here. This works because + # os.path.abspath('.') == os.getcwd(). + return cls().absolute() + + @classmethod + def home(cls): + """Return a new path pointing to the user's home directory (as + returned by os.path.expanduser('~')). + """ + return cls("~").expanduser() + + def absolute(self): + """Return an absolute version of this path by prepending the current + working directory. No normalization or symlink resolution is performed. + + Use resolve() to get the canonical path to a file. + """ + if self.is_absolute(): + return self + elif self.drive: + # There is a CWD on each drive-letter drive. + cwd = self._flavour.abspath(self.drive) + else: + cwd = os.getcwd() + # Fast path for "empty" paths, e.g. Path("."), Path("") or Path(). + # We pass only one argument to with_segments() to avoid the cost + # of joining, and we exploit the fact that getcwd() returns a + # fully-normalized string by storing it in _str. This is used to + # implement Path.cwd(). + if not self.root and not self._tail: + result = self.with_segments(cwd) + result._str = cwd + return result + return self.with_segments(cwd, self) + + def resolve(self, strict=False): + """ + Make the path absolute, resolving all symlinks on the way and also + normalizing it. + """ + + def check_eloop(e): + winerror = getattr(e, 'winerror', 0) + if e.errno == ELOOP or winerror == _WINERROR_CANT_RESOLVE_FILENAME: + raise RuntimeError("Symlink loop from %r" % e.filename) + + try: + s = self._flavour.realpath(self, strict=strict) + except OSError as e: + check_eloop(e) + raise + p = self.with_segments(s) + + # In non-strict mode, realpath() doesn't raise on symlink loops. + # Ensure we get an exception by calling stat() + if not strict: + try: + p.stat() + except OSError as e: + check_eloop(e) + return p + + def owner(self): + """ + Return the login name of the file owner. + """ + try: + import pwd + return pwd.getpwuid(self.stat().st_uid).pw_name + except ImportError: + raise NotImplementedError("Path.owner() is unsupported on this system") + + def group(self): + """ + Return the group name of the file gid. + """ + + try: + import grp + return grp.getgrgid(self.stat().st_gid).gr_name + except ImportError: + raise NotImplementedError("Path.group() is unsupported on this system") + def readlink(self): """ Return the path to which the symbolic link points. """ - path = self._accessor.readlink(self) - return self._from_parts((path,)) + if not hasattr(os, "readlink"): + raise NotImplementedError("os.readlink() not available on this system") + return self.with_segments(os.readlink(self)) def touch(self, mode=0o666, exist_ok=True): """ Create this file with the given access mode, if it doesn't exist. """ - self._accessor.touch(self, mode, exist_ok) + + if exist_ok: + # First try to bump modification time + # Implementation note: GNU touch uses the UTIME_NOW option of + # the utimensat() / futimens() functions. + try: + os.utime(self, None) + except OSError: + # Avoid exception chaining + pass + else: + return + flags = os.O_CREAT | os.O_WRONLY + if not exist_ok: + flags |= os.O_EXCL + fd = os.open(self, flags, mode) + os.close(fd) def mkdir(self, mode=0o777, parents=False, exist_ok=False): """ Create a new directory at this given path. """ try: - self._accessor.mkdir(self, mode) + os.mkdir(self, mode) except FileNotFoundError: if not parents or self.parent == self: raise @@ -1191,7 +1324,7 @@ def chmod(self, mode, *, follow_symlinks=True): """ Change the permissions of the path, like os.chmod(). """ - self._accessor.chmod(self, mode, follow_symlinks=follow_symlinks) + os.chmod(self, mode, follow_symlinks=follow_symlinks) def lchmod(self, mode): """ @@ -1206,7 +1339,7 @@ def unlink(self, missing_ok=False): If the path is a directory, use rmdir() instead. """ try: - self._accessor.unlink(self) + os.unlink(self) except FileNotFoundError: if not missing_ok: raise @@ -1215,14 +1348,7 @@ def rmdir(self): """ Remove this directory. The directory must be empty. """ - self._accessor.rmdir(self) - - def lstat(self): - """ - Like stat(), except if the path points to a symlink, the symlink's - status information is returned, rather than its target's. - """ - return self.stat(follow_symlinks=False) + os.rmdir(self) def rename(self, target): """ @@ -1234,8 +1360,8 @@ def rename(self, target): Returns the new Path instance pointing to the target path. """ - self._accessor.rename(self, target) - return self.__class__(target) + os.rename(self, target) + return self.with_segments(target) def replace(self, target): """ @@ -1247,15 +1373,17 @@ def replace(self, target): Returns the new Path instance pointing to the target path. """ - self._accessor.replace(self, target) - return self.__class__(target) + os.replace(self, target) + return self.with_segments(target) def symlink_to(self, target, target_is_directory=False): """ Make this path a symlink pointing to the target path. Note the order of arguments (link, target) is the reverse of os.symlink. """ - self._accessor.symlink(target, self, target_is_directory) + if not hasattr(os, "symlink"): + raise NotImplementedError("os.symlink() not available on this system") + os.symlink(target, self, target_is_directory) def hardlink_to(self, target): """ @@ -1263,185 +1391,21 @@ def hardlink_to(self, target): Note the order of arguments (self, target) is the reverse of os.link's. """ - self._accessor.link(target, self) - - def link_to(self, target): - """ - Make the target path a hard link pointing to this path. - - Note this function does not make this path a hard link to *target*, - despite the implication of the function and argument names. The order - of arguments (target, link) is the reverse of Path.symlink_to, but - matches that of os.link. - - Deprecated since Python 3.10 and scheduled for removal in Python 3.12. - Use `hardlink_to()` instead. - """ - warnings.warn("pathlib.Path.link_to() is deprecated and is scheduled " - "for removal in Python 3.12. " - "Use pathlib.Path.hardlink_to() instead.", - DeprecationWarning, stacklevel=2) - self._accessor.link(self, target) - - # Convenience functions for querying the stat results - - def exists(self): - """ - Whether this path exists. - """ - try: - self.stat() - except OSError as e: - if not _ignore_error(e): - raise - return False - except ValueError: - # Non-encodable path - return False - return True - - def is_dir(self): - """ - Whether this path is a directory. - """ - try: - return S_ISDIR(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_file(self): - """ - Whether this path is a regular file (also True for symlinks pointing - to regular files). - """ - try: - return S_ISREG(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_mount(self): - """ - Check if this path is a POSIX mount point - """ - # Need to exist and be a dir - if not self.exists() or not self.is_dir(): - return False - - try: - parent_dev = self.parent.stat().st_dev - except OSError: - return False - - dev = self.stat().st_dev - if dev != parent_dev: - return True - ino = self.stat().st_ino - parent_ino = self.parent.stat().st_ino - return ino == parent_ino - - def is_symlink(self): - """ - Whether this path is a symbolic link. - """ - try: - return S_ISLNK(self.lstat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist - return False - except ValueError: - # Non-encodable path - return False - - def is_block_device(self): - """ - Whether this path is a block device. - """ - try: - return S_ISBLK(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_char_device(self): - """ - Whether this path is a character device. - """ - try: - return S_ISCHR(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_fifo(self): - """ - Whether this path is a FIFO. - """ - try: - return S_ISFIFO(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_socket(self): - """ - Whether this path is a socket. - """ - try: - return S_ISSOCK(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False + if not hasattr(os, "link"): + raise NotImplementedError("os.link() not available on this system") + os.link(target, self) def expanduser(self): """ Return a new path with expanded ~ and ~user constructs (as returned by os.path.expanduser) """ - if (not (self._drv or self._root) and - self._parts and self._parts[0][:1] == '~'): - homedir = self._accessor.expanduser(self._parts[0]) + if (not (self.drive or self.root) and + self._tail and self._tail[0][:1] == '~'): + homedir = self._flavour.expanduser(self._tail[0]) if homedir[:1] == "~": raise RuntimeError("Could not determine home directory.") - return self._from_parts([homedir] + self._parts[1:]) + drv, root, tail = self._parse_path(homedir) + return self._from_parsed_parts(drv, root, tail + self._tail[1:]) return self @@ -1453,6 +1417,11 @@ class PosixPath(Path, PurePosixPath): """ __slots__ = () + if os.name == 'nt': + def __new__(cls, *args, **kwargs): + raise NotImplementedError( + f"cannot instantiate {cls.__name__!r} on your system") + class WindowsPath(Path, PureWindowsPath): """Path subclass for Windows systems. @@ -1460,5 +1429,7 @@ class WindowsPath(Path, PureWindowsPath): """ __slots__ = () - def is_mount(self): - raise NotImplementedError("Path.is_mount() is unsupported on this system") + if os.name != 'nt': + def __new__(cls, *args, **kwargs): + raise NotImplementedError( + f"cannot instantiate {cls.__name__!r} on your system") diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index bf3fc5fb24..c1696bb373 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1,3 +1,4 @@ +import contextlib import collections.abc import io import os @@ -12,6 +13,8 @@ from unittest import mock from test.support import import_helper +from test.support import set_recursion_limit +from test.support import is_emscripten, is_wasi from test.support import os_helper from test.support.os_helper import TESTFN, FakePath @@ -21,149 +24,19 @@ grp = pwd = None -class _BaseFlavourTest(object): - - def _check_parse_parts(self, arg, expected): - f = self.flavour.parse_parts - sep = self.flavour.sep - altsep = self.flavour.altsep - actual = f([x.replace('/', sep) for x in arg]) - self.assertEqual(actual, expected) - if altsep: - actual = f([x.replace('/', altsep) for x in arg]) - self.assertEqual(actual, expected) - - def test_parse_parts_common(self): - check = self._check_parse_parts - sep = self.flavour.sep - # Unanchored parts. - check([], ('', '', [])) - check(['a'], ('', '', ['a'])) - check(['a/'], ('', '', ['a'])) - check(['a', 'b'], ('', '', ['a', 'b'])) - # Expansion. - check(['a/b'], ('', '', ['a', 'b'])) - check(['a/b/'], ('', '', ['a', 'b'])) - check(['a', 'b/c', 'd'], ('', '', ['a', 'b', 'c', 'd'])) - # Collapsing and stripping excess slashes. - check(['a', 'b//c', 'd'], ('', '', ['a', 'b', 'c', 'd'])) - check(['a', 'b/c/', 'd'], ('', '', ['a', 'b', 'c', 'd'])) - # Eliminating standalone dots. - check(['.'], ('', '', [])) - check(['.', '.', 'b'], ('', '', ['b'])) - check(['a', '.', 'b'], ('', '', ['a', 'b'])) - check(['a', '.', '.'], ('', '', ['a'])) - # The first part is anchored. - check(['/a/b'], ('', sep, [sep, 'a', 'b'])) - check(['/a', 'b'], ('', sep, [sep, 'a', 'b'])) - check(['/a/', 'b'], ('', sep, [sep, 'a', 'b'])) - # Ignoring parts before an anchored part. - check(['a', '/b', 'c'], ('', sep, [sep, 'b', 'c'])) - check(['a', '/b', '/c'], ('', sep, [sep, 'c'])) - - -class PosixFlavourTest(_BaseFlavourTest, unittest.TestCase): - flavour = pathlib._posix_flavour - - def test_parse_parts(self): - check = self._check_parse_parts - # Collapsing of excess leading slashes, except for the double-slash - # special case. - check(['//a', 'b'], ('', '//', ['//', 'a', 'b'])) - check(['///a', 'b'], ('', '/', ['/', 'a', 'b'])) - check(['////a', 'b'], ('', '/', ['/', 'a', 'b'])) - # Paths which look like NT paths aren't treated specially. - check(['c:a'], ('', '', ['c:a'])) - check(['c:\\a'], ('', '', ['c:\\a'])) - check(['\\a'], ('', '', ['\\a'])) - - def test_splitroot(self): - f = self.flavour.splitroot - self.assertEqual(f(''), ('', '', '')) - self.assertEqual(f('a'), ('', '', 'a')) - self.assertEqual(f('a/b'), ('', '', 'a/b')) - self.assertEqual(f('a/b/'), ('', '', 'a/b/')) - self.assertEqual(f('/a'), ('', '/', 'a')) - self.assertEqual(f('/a/b'), ('', '/', 'a/b')) - self.assertEqual(f('/a/b/'), ('', '/', 'a/b/')) - # The root is collapsed when there are redundant slashes - # except when there are exactly two leading slashes, which - # is a special case in POSIX. - self.assertEqual(f('//a'), ('', '//', 'a')) - self.assertEqual(f('///a'), ('', '/', 'a')) - self.assertEqual(f('///a/b'), ('', '/', 'a/b')) - # Paths which look like NT paths aren't treated specially. - self.assertEqual(f('c:/a/b'), ('', '', 'c:/a/b')) - self.assertEqual(f('\\/a/b'), ('', '', '\\/a/b')) - self.assertEqual(f('\\a\\b'), ('', '', '\\a\\b')) - - -class NTFlavourTest(_BaseFlavourTest, unittest.TestCase): - flavour = pathlib._windows_flavour - - def test_parse_parts(self): - check = self._check_parse_parts - # First part is anchored. - check(['c:'], ('c:', '', ['c:'])) - check(['c:/'], ('c:', '\\', ['c:\\'])) - check(['/'], ('', '\\', ['\\'])) - check(['c:a'], ('c:', '', ['c:', 'a'])) - check(['c:/a'], ('c:', '\\', ['c:\\', 'a'])) - check(['/a'], ('', '\\', ['\\', 'a'])) - # UNC paths. - check(['//a/b'], ('\\\\a\\b', '\\', ['\\\\a\\b\\'])) - check(['//a/b/'], ('\\\\a\\b', '\\', ['\\\\a\\b\\'])) - check(['//a/b/c'], ('\\\\a\\b', '\\', ['\\\\a\\b\\', 'c'])) - # Second part is anchored, so that the first part is ignored. - check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'b', 'c'])) - check(['a', 'Z:/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c'])) - # UNC paths. - check(['a', '//b/c', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd'])) - # Collapsing and stripping excess slashes. - check(['a', 'Z://b//c/', 'd/'], ('Z:', '\\', ['Z:\\', 'b', 'c', 'd'])) - # UNC paths. - check(['a', '//b/c//', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd'])) - # Extended paths. - check(['//?/c:/'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\'])) - check(['//?/c:/a'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'a'])) - check(['//?/c:/a', '/b'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'b'])) - # Extended UNC paths (format is "\\?\UNC\server\share"). - check(['//?/UNC/b/c'], ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\'])) - check(['//?/UNC/b/c/d'], ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\', 'd'])) - # Second part has a root but not drive. - check(['a', '/b', 'c'], ('', '\\', ['\\', 'b', 'c'])) - check(['Z:/a', '/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c'])) - check(['//?/Z:/a', '/b', 'c'], ('\\\\?\\Z:', '\\', ['\\\\?\\Z:\\', 'b', 'c'])) - - def test_splitroot(self): - f = self.flavour.splitroot - self.assertEqual(f(''), ('', '', '')) - self.assertEqual(f('a'), ('', '', 'a')) - self.assertEqual(f('a\\b'), ('', '', 'a\\b')) - self.assertEqual(f('\\a'), ('', '\\', 'a')) - self.assertEqual(f('\\a\\b'), ('', '\\', 'a\\b')) - self.assertEqual(f('c:a\\b'), ('c:', '', 'a\\b')) - self.assertEqual(f('c:\\a\\b'), ('c:', '\\', 'a\\b')) - # Redundant slashes in the root are collapsed. - self.assertEqual(f('\\\\a'), ('', '\\', 'a')) - self.assertEqual(f('\\\\\\a/b'), ('', '\\', 'a/b')) - self.assertEqual(f('c:\\\\a'), ('c:', '\\', 'a')) - self.assertEqual(f('c:\\\\\\a/b'), ('c:', '\\', 'a/b')) - # Valid UNC paths. - self.assertEqual(f('\\\\a\\b'), ('\\\\a\\b', '\\', '')) - self.assertEqual(f('\\\\a\\b\\'), ('\\\\a\\b', '\\', '')) - self.assertEqual(f('\\\\a\\b\\c\\d'), ('\\\\a\\b', '\\', 'c\\d')) - # These are non-UNC paths (according to ntpath.py and test_ntpath). - # However, command.com says such paths are invalid, so it's - # difficult to know what the right semantics are. - self.assertEqual(f('\\\\\\a\\b'), ('', '\\', 'a\\b')) - self.assertEqual(f('\\\\a'), ('', '\\', 'a')) - - # # Tests for the pure classes. # +class _BasePurePathSubclass(object): + def __init__(self, *pathsegments, session_id): + super().__init__(*pathsegments) + self.session_id = session_id + + def with_segments(self, *pathsegments): + return type(self)(*pathsegments, session_id=self.session_id) + + class _BasePurePathTest(object): # Keys are canonical paths, values are list of tuples of arguments @@ -176,8 +49,7 @@ class _BasePurePathTest(object): ('', 'a', 'b'), ('a', '', 'b'), ('a', 'b', ''), ], '/b/c/d': [ - ('a', '/b/c', 'd'), ('a', '///b//c', 'd/'), - ('/a', '/b/c', 'd'), + ('a', '/b/c', 'd'), ('/a', '/b/c', 'd'), # Empty components get removed. ('/', 'b', '', 'c/d'), ('/', '', 'b/c/d'), ('', '/b/c/d'), ], @@ -202,6 +74,34 @@ def test_constructor_common(self): self.assertEqual(P(P('a'), 'b'), P('a/b')) self.assertEqual(P(P('a'), P('b')), P('a/b')) self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c"))) + self.assertEqual(P(P('./a:b')), P('./a:b')) + + def test_bytes(self): + P = self.cls + message = (r"argument should be a str or an os\.PathLike object " + r"where __fspath__ returns a str, not 'bytes'") + with self.assertRaisesRegex(TypeError, message): + P(b'a') + with self.assertRaisesRegex(TypeError, message): + P(b'a', 'b') + with self.assertRaisesRegex(TypeError, message): + P('a', b'b') + with self.assertRaises(TypeError): + P('a').joinpath(b'b') + with self.assertRaises(TypeError): + P('a') / b'b' + with self.assertRaises(TypeError): + b'a' / P('b') + with self.assertRaises(TypeError): + P('a').match(b'b') + with self.assertRaises(TypeError): + P('a').relative_to(b'b') + with self.assertRaises(TypeError): + P('a').with_name(b'b') + with self.assertRaises(TypeError): + P('a').with_stem(b'b') + with self.assertRaises(TypeError): + P('a').with_suffix(b'b') def _check_str_subclass(self, *args): # Issue #21127: it should be possible to construct a PurePath object @@ -222,6 +122,63 @@ def test_str_subclass_common(self): self._check_str_subclass('a/b.txt') self._check_str_subclass('/a/b.txt') + @unittest.skip("TODO: RUSTPYTHON; PyObject::set_slot index out of bounds") + def test_with_segments_common(self): + class P(_BasePurePathSubclass, self.cls): + pass + p = P('foo', 'bar', session_id=42) + self.assertEqual(42, (p / 'foo').session_id) + self.assertEqual(42, ('foo' / p).session_id) + self.assertEqual(42, p.joinpath('foo').session_id) + self.assertEqual(42, p.with_name('foo').session_id) + self.assertEqual(42, p.with_stem('foo').session_id) + self.assertEqual(42, p.with_suffix('.foo').session_id) + self.assertEqual(42, p.with_segments('foo').session_id) + self.assertEqual(42, p.relative_to('foo').session_id) + self.assertEqual(42, p.parent.session_id) + for parent in p.parents: + self.assertEqual(42, parent.session_id) + + def _get_drive_root_parts(self, parts): + path = self.cls(*parts) + return path.drive, path.root, path.parts + + def _check_drive_root_parts(self, arg, *expected): + sep = self.flavour.sep + actual = self._get_drive_root_parts([x.replace('/', sep) for x in arg]) + self.assertEqual(actual, expected) + if altsep := self.flavour.altsep: + actual = self._get_drive_root_parts([x.replace('/', altsep) for x in arg]) + self.assertEqual(actual, expected) + + def test_drive_root_parts_common(self): + check = self._check_drive_root_parts + sep = self.flavour.sep + # Unanchored parts. + check((), '', '', ()) + check(('a',), '', '', ('a',)) + check(('a/',), '', '', ('a',)) + check(('a', 'b'), '', '', ('a', 'b')) + # Expansion. + check(('a/b',), '', '', ('a', 'b')) + check(('a/b/',), '', '', ('a', 'b')) + check(('a', 'b/c', 'd'), '', '', ('a', 'b', 'c', 'd')) + # Collapsing and stripping excess slashes. + check(('a', 'b//c', 'd'), '', '', ('a', 'b', 'c', 'd')) + check(('a', 'b/c/', 'd'), '', '', ('a', 'b', 'c', 'd')) + # Eliminating standalone dots. + check(('.',), '', '', ()) + check(('.', '.', 'b'), '', '', ('b',)) + check(('a', '.', 'b'), '', '', ('a', 'b')) + check(('a', '.', '.'), '', '', ('a',)) + # The first part is anchored. + check(('/a/b',), '', sep, (sep, 'a', 'b')) + check(('/a', 'b'), '', sep, (sep, 'a', 'b')) + check(('/a/', 'b'), '', sep, (sep, 'a', 'b')) + # Ignoring parts before an anchored part. + check(('a', '/b', 'c'), '', sep, (sep, 'b', 'c')) + check(('a', '/b', '/c'), '', sep, (sep, 'c')) + def test_join_common(self): P = self.cls p = P('a/b') @@ -285,19 +242,26 @@ def test_as_uri_common(self): def test_repr_common(self): for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): - p = self.cls(pathstr) - clsname = p.__class__.__name__ - r = repr(p) - # The repr() is in the form ClassName("forward-slashes path"). - self.assertTrue(r.startswith(clsname + '('), r) - self.assertTrue(r.endswith(')'), r) - inner = r[len(clsname) + 1 : -1] - self.assertEqual(eval(inner), p.as_posix()) - # The repr() roundtrips. - q = eval(r, pathlib.__dict__) - self.assertIs(q.__class__, p.__class__) - self.assertEqual(q, p) - self.assertEqual(repr(q), r) + with self.subTest(pathstr=pathstr): + p = self.cls(pathstr) + clsname = p.__class__.__name__ + r = repr(p) + # The repr() is in the form ClassName("forward-slashes path"). + self.assertTrue(r.startswith(clsname + '('), r) + self.assertTrue(r.endswith(')'), r) + inner = r[len(clsname) + 1 : -1] + self.assertEqual(eval(inner), p.as_posix()) + + def test_repr_roundtrips(self): + for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): + with self.subTest(pathstr=pathstr): + p = self.cls(pathstr) + r = repr(p) + # The repr() roundtrips. + q = eval(r, pathlib.__dict__) + self.assertIs(q.__class__, p.__class__) + self.assertEqual(q, p) + self.assertEqual(repr(q), r) def test_eq_common(self): P = self.cls @@ -349,6 +313,15 @@ def test_match_common(self): # Multi-part glob-style pattern. self.assertFalse(P('/a/b/c.py').match('/**/*.py')) self.assertTrue(P('/a/b/c.py').match('/a/**/*.py')) + # Case-sensitive flag + self.assertFalse(P('A.py').match('a.PY', case_sensitive=True)) + self.assertTrue(P('A.py').match('a.PY', case_sensitive=False)) + self.assertFalse(P('c:/a/B.Py').match('C:/A/*.pY', case_sensitive=True)) + self.assertTrue(P('/a/b/c.py').match('/A/*/*.Py', case_sensitive=False)) + # Matching against empty path + self.assertFalse(P().match('*')) + self.assertTrue(P().match('**')) + self.assertFalse(P().match('**/*')) def test_ordering_common(self): # Ordering is tuple-alike. @@ -385,8 +358,6 @@ def test_parts_common(self): p = P('a/b') parts = p.parts self.assertEqual(parts, ('a', 'b')) - # The object gets reused. - self.assertIs(parts, p.parts) # When the path is absolute, the anchor is a separate part. p = P('/a/b') parts = p.parts @@ -576,6 +547,7 @@ def test_with_name_common(self): self.assertRaises(ValueError, P('.').with_name, 'd.xml') self.assertRaises(ValueError, P('/').with_name, 'd.xml') self.assertRaises(ValueError, P('a/b').with_name, '') + self.assertRaises(ValueError, P('a/b').with_name, '.') self.assertRaises(ValueError, P('a/b').with_name, '/c') self.assertRaises(ValueError, P('a/b').with_name, 'c/') self.assertRaises(ValueError, P('a/b').with_name, 'c/d') @@ -593,6 +565,7 @@ def test_with_stem_common(self): self.assertRaises(ValueError, P('.').with_stem, 'd') self.assertRaises(ValueError, P('/').with_stem, 'd') self.assertRaises(ValueError, P('a/b').with_stem, '') + self.assertRaises(ValueError, P('a/b').with_stem, '.') self.assertRaises(ValueError, P('a/b').with_stem, '/c') self.assertRaises(ValueError, P('a/b').with_stem, 'c/') self.assertRaises(ValueError, P('a/b').with_stem, 'c/d') @@ -634,13 +607,36 @@ def test_relative_to_common(self): self.assertEqual(p.relative_to('a/'), P('b')) self.assertEqual(p.relative_to(P('a/b')), P()) self.assertEqual(p.relative_to('a/b'), P()) + self.assertEqual(p.relative_to(P(), walk_up=True), P('a/b')) + self.assertEqual(p.relative_to('', walk_up=True), P('a/b')) + self.assertEqual(p.relative_to(P('a'), walk_up=True), P('b')) + self.assertEqual(p.relative_to('a', walk_up=True), P('b')) + self.assertEqual(p.relative_to('a/', walk_up=True), P('b')) + self.assertEqual(p.relative_to(P('a/b'), walk_up=True), P()) + self.assertEqual(p.relative_to('a/b', walk_up=True), P()) + self.assertEqual(p.relative_to(P('a/c'), walk_up=True), P('../b')) + self.assertEqual(p.relative_to('a/c', walk_up=True), P('../b')) + self.assertEqual(p.relative_to(P('a/b/c'), walk_up=True), P('..')) + self.assertEqual(p.relative_to('a/b/c', walk_up=True), P('..')) + self.assertEqual(p.relative_to(P('c'), walk_up=True), P('../a/b')) + self.assertEqual(p.relative_to('c', walk_up=True), P('../a/b')) # With several args. - self.assertEqual(p.relative_to('a', 'b'), P()) + with self.assertWarns(DeprecationWarning): + p.relative_to('a', 'b') + p.relative_to('a', 'b', walk_up=True) # Unrelated paths. self.assertRaises(ValueError, p.relative_to, P('c')) self.assertRaises(ValueError, p.relative_to, P('a/b/c')) self.assertRaises(ValueError, p.relative_to, P('a/c')) self.assertRaises(ValueError, p.relative_to, P('/a')) + self.assertRaises(ValueError, p.relative_to, P("../a")) + self.assertRaises(ValueError, p.relative_to, P("a/..")) + self.assertRaises(ValueError, p.relative_to, P("/a/..")) + self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/a'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("../a"), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("a/.."), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("/a/.."), walk_up=True) p = P('/a/b') self.assertEqual(p.relative_to(P('/')), P('a/b')) self.assertEqual(p.relative_to('/'), P('a/b')) @@ -649,6 +645,19 @@ def test_relative_to_common(self): self.assertEqual(p.relative_to('/a/'), P('b')) self.assertEqual(p.relative_to(P('/a/b')), P()) self.assertEqual(p.relative_to('/a/b'), P()) + self.assertEqual(p.relative_to(P('/'), walk_up=True), P('a/b')) + self.assertEqual(p.relative_to('/', walk_up=True), P('a/b')) + self.assertEqual(p.relative_to(P('/a'), walk_up=True), P('b')) + self.assertEqual(p.relative_to('/a', walk_up=True), P('b')) + self.assertEqual(p.relative_to('/a/', walk_up=True), P('b')) + self.assertEqual(p.relative_to(P('/a/b'), walk_up=True), P()) + self.assertEqual(p.relative_to('/a/b', walk_up=True), P()) + self.assertEqual(p.relative_to(P('/a/c'), walk_up=True), P('../b')) + self.assertEqual(p.relative_to('/a/c', walk_up=True), P('../b')) + self.assertEqual(p.relative_to(P('/a/b/c'), walk_up=True), P('..')) + self.assertEqual(p.relative_to('/a/b/c', walk_up=True), P('..')) + self.assertEqual(p.relative_to(P('/c'), walk_up=True), P('../a/b')) + self.assertEqual(p.relative_to('/c', walk_up=True), P('../a/b')) # Unrelated paths. self.assertRaises(ValueError, p.relative_to, P('/c')) self.assertRaises(ValueError, p.relative_to, P('/a/b/c')) @@ -656,6 +665,14 @@ def test_relative_to_common(self): self.assertRaises(ValueError, p.relative_to, P()) self.assertRaises(ValueError, p.relative_to, '') self.assertRaises(ValueError, p.relative_to, P('a')) + self.assertRaises(ValueError, p.relative_to, P("../a")) + self.assertRaises(ValueError, p.relative_to, P("a/..")) + self.assertRaises(ValueError, p.relative_to, P("/a/..")) + self.assertRaises(ValueError, p.relative_to, P(''), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('a'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("../a"), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("a/.."), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("/a/.."), walk_up=True) def test_is_relative_to_common(self): P = self.cls @@ -669,7 +686,8 @@ def test_is_relative_to_common(self): self.assertTrue(p.is_relative_to(P('a/b'))) self.assertTrue(p.is_relative_to('a/b')) # With several args. - self.assertTrue(p.is_relative_to('a', 'b')) + with self.assertWarns(DeprecationWarning): + p.is_relative_to('a', 'b') # Unrelated paths. self.assertFalse(p.is_relative_to(P('c'))) self.assertFalse(p.is_relative_to(P('a/b/c'))) @@ -706,6 +724,18 @@ def test_pickling_common(self): class PurePosixPathTest(_BasePurePathTest, unittest.TestCase): cls = pathlib.PurePosixPath + def test_drive_root_parts(self): + check = self._check_drive_root_parts + # Collapsing of excess leading slashes, except for the double-slash + # special case. + check(('//a', 'b'), '', '//', ('//', 'a', 'b')) + check(('///a', 'b'), '', '/', ('/', 'a', 'b')) + check(('////a', 'b'), '', '/', ('/', 'a', 'b')) + # Paths which look like NT paths aren't treated specially. + check(('c:a',), '', '', ('c:a',)) + check(('c:\\a',), '', '', ('c:\\a',)) + check(('\\a',), '', '', ('\\a',)) + def test_root(self): P = self.cls self.assertEqual(P('/a/b').root, '/') @@ -778,13 +808,20 @@ def test_div(self): pp = P('//a') / '/c' self.assertEqual(pp, P('/c')) + def test_parse_windows_path(self): + P = self.cls + p = P('c:', 'a', 'b') + pp = P(pathlib.PureWindowsPath('c:\\a\\b')) + self.assertEqual(p, pp) + class PureWindowsPathTest(_BasePurePathTest, unittest.TestCase): cls = pathlib.PureWindowsPath equivalences = _BasePurePathTest.equivalences.copy() equivalences.update({ - 'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('/', 'c:', 'a') ], + './a:b': [ ('./a:b',) ], + 'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('.', 'c:', 'a') ], 'c:/a': [ ('c:/', 'a'), ('c:', '/', 'a'), ('c:', '/a'), ('/z', 'c:/', 'a'), ('//x/y', 'c:/', 'a'), @@ -795,6 +832,68 @@ class PureWindowsPathTest(_BasePurePathTest, unittest.TestCase): ], }) + def test_drive_root_parts(self): + check = self._check_drive_root_parts + # First part is anchored. + check(('c:',), 'c:', '', ('c:',)) + check(('c:/',), 'c:', '\\', ('c:\\',)) + check(('/',), '', '\\', ('\\',)) + check(('c:a',), 'c:', '', ('c:', 'a')) + check(('c:/a',), 'c:', '\\', ('c:\\', 'a')) + check(('/a',), '', '\\', ('\\', 'a')) + # UNC paths. + check(('//',), '\\\\', '', ('\\\\',)) + check(('//a',), '\\\\a', '', ('\\\\a',)) + check(('//a/',), '\\\\a\\', '', ('\\\\a\\',)) + check(('//a/b',), '\\\\a\\b', '\\', ('\\\\a\\b\\',)) + check(('//a/b/',), '\\\\a\\b', '\\', ('\\\\a\\b\\',)) + check(('//a/b/c',), '\\\\a\\b', '\\', ('\\\\a\\b\\', 'c')) + # Second part is anchored, so that the first part is ignored. + check(('a', 'Z:b', 'c'), 'Z:', '', ('Z:', 'b', 'c')) + check(('a', 'Z:/b', 'c'), 'Z:', '\\', ('Z:\\', 'b', 'c')) + # UNC paths. + check(('a', '//b/c', 'd'), '\\\\b\\c', '\\', ('\\\\b\\c\\', 'd')) + # Collapsing and stripping excess slashes. + check(('a', 'Z://b//c/', 'd/'), 'Z:', '\\', ('Z:\\', 'b', 'c', 'd')) + # UNC paths. + check(('a', '//b/c//', 'd'), '\\\\b\\c', '\\', ('\\\\b\\c\\', 'd')) + # Extended paths. + check(('//./c:',), '\\\\.\\c:', '', ('\\\\.\\c:',)) + check(('//?/c:/',), '\\\\?\\c:', '\\', ('\\\\?\\c:\\',)) + check(('//?/c:/a',), '\\\\?\\c:', '\\', ('\\\\?\\c:\\', 'a')) + check(('//?/c:/a', '/b'), '\\\\?\\c:', '\\', ('\\\\?\\c:\\', 'b')) + # Extended UNC paths (format is "\\?\UNC\server\share"). + check(('//?',), '\\\\?', '', ('\\\\?',)) + check(('//?/',), '\\\\?\\', '', ('\\\\?\\',)) + check(('//?/UNC',), '\\\\?\\UNC', '', ('\\\\?\\UNC',)) + check(('//?/UNC/',), '\\\\?\\UNC\\', '', ('\\\\?\\UNC\\',)) + check(('//?/UNC/b',), '\\\\?\\UNC\\b', '', ('\\\\?\\UNC\\b',)) + check(('//?/UNC/b/',), '\\\\?\\UNC\\b\\', '', ('\\\\?\\UNC\\b\\',)) + check(('//?/UNC/b/c',), '\\\\?\\UNC\\b\\c', '\\', ('\\\\?\\UNC\\b\\c\\',)) + check(('//?/UNC/b/c/',), '\\\\?\\UNC\\b\\c', '\\', ('\\\\?\\UNC\\b\\c\\',)) + check(('//?/UNC/b/c/d',), '\\\\?\\UNC\\b\\c', '\\', ('\\\\?\\UNC\\b\\c\\', 'd')) + # UNC device paths + check(('//./BootPartition/',), '\\\\.\\BootPartition', '\\', ('\\\\.\\BootPartition\\',)) + check(('//?/BootPartition/',), '\\\\?\\BootPartition', '\\', ('\\\\?\\BootPartition\\',)) + check(('//./PhysicalDrive0',), '\\\\.\\PhysicalDrive0', '', ('\\\\.\\PhysicalDrive0',)) + check(('//?/Volume{}/',), '\\\\?\\Volume{}', '\\', ('\\\\?\\Volume{}\\',)) + check(('//./nul',), '\\\\.\\nul', '', ('\\\\.\\nul',)) + # Second part has a root but not drive. + check(('a', '/b', 'c'), '', '\\', ('\\', 'b', 'c')) + check(('Z:/a', '/b', 'c'), 'Z:', '\\', ('Z:\\', 'b', 'c')) + check(('//?/Z:/a', '/b', 'c'), '\\\\?\\Z:', '\\', ('\\\\?\\Z:\\', 'b', 'c')) + # Joining with the same drive => the first path is appended to if + # the second path is relative. + check(('c:/a/b', 'c:x/y'), 'c:', '\\', ('c:\\', 'a', 'b', 'x', 'y')) + check(('c:/a/b', 'c:/x/y'), 'c:', '\\', ('c:\\', 'x', 'y')) + # Paths to files with NTFS alternate data streams + check(('./c:s',), '', '', ('c:s',)) + check(('cc:s',), '', '', ('cc:s',)) + check(('C:c:s',), 'C:', '', ('C:', 'c:s')) + check(('C:/c:s',), 'C:', '\\', ('C:\\', 'c:s')) + check(('D:a', './c:b'), 'D:', '', ('D:', 'a', 'c:b')) + check(('D:/a', './c:b'), 'D:', '\\', ('D:\\', 'a', 'c:b')) + def test_str(self): p = self.cls('a/b/c') self.assertEqual(str(p), 'a\\b\\c') @@ -808,6 +907,7 @@ def test_str(self): self.assertEqual(str(p), '\\\\a\\b\\c\\d') def test_str_subclass(self): + self._check_str_subclass('.\\a:b') self._check_str_subclass('c:') self._check_str_subclass('c:a') self._check_str_subclass('c:a\\b.txt') @@ -829,6 +929,7 @@ def test_eq(self): self.assertEqual(P('a/B'), P('A/b')) self.assertEqual(P('C:a/B'), P('c:A/b')) self.assertEqual(P('//Some/SHARE/a/B'), P('//somE/share/A/b')) + self.assertEqual(P('\u0130'), P('i\u0307')) def test_as_uri(self): P = self.cls @@ -846,11 +947,10 @@ def test_as_uri(self): self.assertEqual(P('//some/share/a/b%#c\xe9').as_uri(), 'file://some/share/a/b%25%23c%C3%A9') - def test_match_common(self): + def test_match(self): P = self.cls # Absolute patterns. - self.assertTrue(P('c:/b.py').match('/*.py')) - self.assertTrue(P('c:/b.py').match('c:*.py')) + self.assertTrue(P('c:/b.py').match('*:/*.py')) self.assertTrue(P('c:/b.py').match('c:/*.py')) self.assertFalse(P('d:/b.py').match('c:/*.py')) # wrong drive self.assertFalse(P('b.py').match('/*.py')) @@ -861,7 +961,7 @@ def test_match_common(self): self.assertFalse(P('/b.py').match('c:*.py')) self.assertFalse(P('/b.py').match('c:/*.py')) # UNC patterns. - self.assertTrue(P('//some/share/a.py').match('/*.py')) + self.assertTrue(P('//some/share/a.py').match('//*/*/*.py')) self.assertTrue(P('//some/share/a.py').match('//some/share/*.py')) self.assertFalse(P('//other/share/a.py').match('//some/share/*.py')) self.assertFalse(P('//some/share/a/b.py').match('//some/share/*.py')) @@ -869,6 +969,10 @@ def test_match_common(self): self.assertTrue(P('B.py').match('b.PY')) self.assertTrue(P('c:/a/B.Py').match('C:/A/*.pY')) self.assertTrue(P('//Some/Share/B.Py').match('//somE/sharE/*.pY')) + # Path anchor doesn't match pattern anchor + self.assertFalse(P('c:/b.py').match('/*.py')) # 'c:/' vs '/' + self.assertFalse(P('c:/b.py').match('c:*.py')) # 'c:/' vs 'c:' + self.assertFalse(P('//some/share/a.py').match('/*.py')) # '//some/share/' vs '/' def test_ordering_common(self): # Case-insensitivity. @@ -972,6 +1076,7 @@ def test_drive(self): self.assertEqual(P('//a/b').drive, '\\\\a\\b') self.assertEqual(P('//a/b/').drive, '\\\\a\\b') self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b') + self.assertEqual(P('./c:a').drive, '') def test_root(self): P = self.cls @@ -1065,8 +1170,10 @@ def test_with_name(self): self.assertRaises(ValueError, P('c:').with_name, 'd.xml') self.assertRaises(ValueError, P('c:/').with_name, 'd.xml') self.assertRaises(ValueError, P('//My/Share').with_name, 'd.xml') - self.assertRaises(ValueError, P('c:a/b').with_name, 'd:') - self.assertRaises(ValueError, P('c:a/b').with_name, 'd:e') + self.assertEqual(str(P('a').with_name('d:')), '.\\d:') + self.assertEqual(str(P('a').with_name('d:e')), '.\\d:e') + self.assertEqual(P('c:a/b').with_name('d:'), P('c:a/d:')) + self.assertEqual(P('c:a/b').with_name('d:e'), P('c:a/d:e')) self.assertRaises(ValueError, P('c:a/b').with_name, 'd:/e') self.assertRaises(ValueError, P('c:a/b').with_name, '//My/Share') @@ -1079,8 +1186,10 @@ def test_with_stem(self): self.assertRaises(ValueError, P('c:').with_stem, 'd') self.assertRaises(ValueError, P('c:/').with_stem, 'd') self.assertRaises(ValueError, P('//My/Share').with_stem, 'd') - self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:') - self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:e') + self.assertEqual(str(P('a').with_stem('d:')), '.\\d:') + self.assertEqual(str(P('a').with_stem('d:e')), '.\\d:e') + self.assertEqual(P('c:a/b').with_stem('d:'), P('c:a/d:')) + self.assertEqual(P('c:a/b').with_stem('d:e'), P('c:a/d:e')) self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:/e') self.assertRaises(ValueError, P('c:a/b').with_stem, '//My/Share') @@ -1118,6 +1227,16 @@ def test_relative_to(self): self.assertEqual(p.relative_to('c:foO/'), P('Bar')) self.assertEqual(p.relative_to(P('c:foO/baR')), P()) self.assertEqual(p.relative_to('c:foO/baR'), P()) + self.assertEqual(p.relative_to(P('c:'), walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:foO'), walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:foO', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:foO/', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to(P('c:foO/baR'), walk_up=True), P()) + self.assertEqual(p.relative_to('c:foO/baR', walk_up=True), P()) + self.assertEqual(p.relative_to(P('C:Foo/Bar/Baz'), walk_up=True), P('..')) + self.assertEqual(p.relative_to(P('C:Foo/Baz'), walk_up=True), P('../Bar')) + self.assertEqual(p.relative_to(P('C:Baz/Bar'), walk_up=True), P('../../Foo/Bar')) # Unrelated paths. self.assertRaises(ValueError, p.relative_to, P()) self.assertRaises(ValueError, p.relative_to, '') @@ -1128,11 +1247,14 @@ def test_relative_to(self): self.assertRaises(ValueError, p.relative_to, P('C:/Foo')) self.assertRaises(ValueError, p.relative_to, P('C:Foo/Bar/Baz')) self.assertRaises(ValueError, p.relative_to, P('C:Foo/Baz')) + self.assertRaises(ValueError, p.relative_to, P(), walk_up=True) + self.assertRaises(ValueError, p.relative_to, '', walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo'), walk_up=True) p = P('C:/Foo/Bar') - self.assertEqual(p.relative_to(P('c:')), P('/Foo/Bar')) - self.assertEqual(p.relative_to('c:'), P('/Foo/Bar')) - self.assertEqual(str(p.relative_to(P('c:'))), '\\Foo\\Bar') - self.assertEqual(str(p.relative_to('c:')), '\\Foo\\Bar') self.assertEqual(p.relative_to(P('c:/')), P('Foo/Bar')) self.assertEqual(p.relative_to('c:/'), P('Foo/Bar')) self.assertEqual(p.relative_to(P('c:/foO')), P('Bar')) @@ -1140,7 +1262,19 @@ def test_relative_to(self): self.assertEqual(p.relative_to('c:/foO/'), P('Bar')) self.assertEqual(p.relative_to(P('c:/foO/baR')), P()) self.assertEqual(p.relative_to('c:/foO/baR'), P()) + self.assertEqual(p.relative_to(P('c:/'), walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:/', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:/foO'), walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:/foO', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:/foO/', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to(P('c:/foO/baR'), walk_up=True), P()) + self.assertEqual(p.relative_to('c:/foO/baR', walk_up=True), P()) + self.assertEqual(p.relative_to('C:/Baz', walk_up=True), P('../Foo/Bar')) + self.assertEqual(p.relative_to('C:/Foo/Bar/Baz', walk_up=True), P('..')) + self.assertEqual(p.relative_to('C:/Foo/Baz', walk_up=True), P('../Bar')) # Unrelated paths. + self.assertRaises(ValueError, p.relative_to, 'c:') + self.assertRaises(ValueError, p.relative_to, P('c:')) self.assertRaises(ValueError, p.relative_to, P('C:/Baz')) self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Bar/Baz')) self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Baz')) @@ -1150,6 +1284,14 @@ def test_relative_to(self): self.assertRaises(ValueError, p.relative_to, P('/')) self.assertRaises(ValueError, p.relative_to, P('/Foo')) self.assertRaises(ValueError, p.relative_to, P('//C/Foo')) + self.assertRaises(ValueError, p.relative_to, 'c:', walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('c:'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('C:Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('d:/'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('//C/Foo'), walk_up=True) # UNC paths. p = P('//Server/Share/Foo/Bar') self.assertEqual(p.relative_to(P('//sErver/sHare')), P('Foo/Bar')) @@ -1160,11 +1302,25 @@ def test_relative_to(self): self.assertEqual(p.relative_to('//sErver/sHare/Foo/'), P('Bar')) self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar')), P()) self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar'), P()) + self.assertEqual(p.relative_to(P('//sErver/sHare'), walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo'), walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar'), walk_up=True), P()) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar', walk_up=True), P()) + self.assertEqual(p.relative_to(P('//sErver/sHare/bar'), walk_up=True), P('../Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/bar', walk_up=True), P('../Foo/Bar')) # Unrelated paths. self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo')) self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo')) self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo')) self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo')) + self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo'), walk_up=True) def test_is_relative_to(self): P = self.cls @@ -1187,13 +1343,13 @@ def test_is_relative_to(self): self.assertFalse(p.is_relative_to(P('C:Foo/Bar/Baz'))) self.assertFalse(p.is_relative_to(P('C:Foo/Baz'))) p = P('C:/Foo/Bar') - self.assertTrue(p.is_relative_to('c:')) self.assertTrue(p.is_relative_to(P('c:/'))) self.assertTrue(p.is_relative_to(P('c:/foO'))) self.assertTrue(p.is_relative_to('c:/foO/')) self.assertTrue(p.is_relative_to(P('c:/foO/baR'))) self.assertTrue(p.is_relative_to('c:/foO/baR')) # Unrelated paths. + self.assertFalse(p.is_relative_to('c:')) self.assertFalse(p.is_relative_to(P('C:/Baz'))) self.assertFalse(p.is_relative_to(P('C:/Foo/Bar/Baz'))) self.assertFalse(p.is_relative_to(P('C:/Foo/Baz'))) @@ -1261,6 +1417,21 @@ def test_join(self): self.assertEqual(pp, P('C:/a/b/x/y')) pp = p.joinpath('c:/x/y') self.assertEqual(pp, P('C:/x/y')) + # Joining with files with NTFS data streams => the filename should + # not be parsed as a drive letter + pp = p.joinpath(P('./d:s')) + self.assertEqual(pp, P('C:/a/b/d:s')) + pp = p.joinpath(P('./dd:s')) + self.assertEqual(pp, P('C:/a/b/dd:s')) + pp = p.joinpath(P('E:d:s')) + self.assertEqual(pp, P('E:d:s')) + # Joining onto a UNC path with no root + pp = P('//').joinpath('server') + self.assertEqual(pp, P('//server')) + pp = P('//server').joinpath('share') + self.assertEqual(pp, P('//server/share')) + pp = P('//./BootPartition').joinpath('Windows') + self.assertEqual(pp, P('//./BootPartition/Windows')) def test_div(self): # Basically the same as joinpath(). @@ -1281,6 +1452,11 @@ def test_div(self): # the second path is relative. self.assertEqual(p / 'c:x/y', P('C:/a/b/x/y')) self.assertEqual(p / 'c:/x/y', P('C:/x/y')) + # Joining with files with NTFS data streams => the filename should + # not be parsed as a drive letter + self.assertEqual(p / P('./d:s'), P('C:/a/b/d:s')) + self.assertEqual(p / P('./dd:s'), P('C:/a/b/dd:s')) + self.assertEqual(p / P('E:d:s'), P('E:d:s')) def test_is_reserved(self): P = self.cls @@ -1391,6 +1567,7 @@ class _BasePathTest(object): # | |-- dirD # | | `-- fileD # | `-- fileC + # | `-- novel.txt # |-- dirE # No permissions # |-- fileA # |-- linkA -> fileA @@ -1415,6 +1592,8 @@ def cleanup(): f.write(b"this is file B\n") with open(join('dirC', 'fileC'), 'wb') as f: f.write(b"this is file C\n") + with open(join('dirC', 'novel.txt'), 'wb') as f: + f.write(b"this is a novel\n") with open(join('dirC', 'dirD', 'fileD'), 'wb') as f: f.write(b"this is file D\n") os.chmod(join('dirE'), 0) @@ -1461,6 +1640,28 @@ def test_cwd(self): p = self.cls.cwd() self._test_cwd(p) + def test_absolute_common(self): + P = self.cls + + with mock.patch("os.getcwd") as getcwd: + getcwd.return_value = BASE + + # Simple relative paths. + self.assertEqual(str(P().absolute()), BASE) + self.assertEqual(str(P('.').absolute()), BASE) + self.assertEqual(str(P('a').absolute()), os.path.join(BASE, 'a')) + self.assertEqual(str(P('a', 'b', 'c').absolute()), os.path.join(BASE, 'a', 'b', 'c')) + + # Symlinks should not be resolved. + self.assertEqual(str(P('linkB', 'fileB').absolute()), os.path.join(BASE, 'linkB', 'fileB')) + self.assertEqual(str(P('brokenLink').absolute()), os.path.join(BASE, 'brokenLink')) + self.assertEqual(str(P('brokenLinkLoop').absolute()), os.path.join(BASE, 'brokenLinkLoop')) + + # '..' entries should be preserved and not normalised. + self.assertEqual(str(P('..').absolute()), os.path.join(BASE, '..')) + self.assertEqual(str(P('a', '..').absolute()), os.path.join(BASE, 'a', '..')) + self.assertEqual(str(P('..', 'b').absolute()), os.path.join(BASE, '..', 'b')) + def _test_home(self, p): q = self.cls(os.path.expanduser('~')) self.assertEqual(p, q) @@ -1468,6 +1669,9 @@ def _test_home(self, p): self.assertIs(type(p), type(q)) self.assertTrue(p.is_absolute()) + @unittest.skipIf( + pwd is None, reason="Test requires pwd module to get homedir." + ) def test_home(self): with os_helper.EnvironmentVarGuard() as env: self._test_home(self.cls.home()) @@ -1480,6 +1684,28 @@ def test_home(self): env['HOME'] = os.path.join(BASE, 'home') self._test_home(self.cls.home()) + @unittest.skip("TODO: RUSTPYTHON; PyObject::set_slot index out of bounds") + def test_with_segments(self): + class P(_BasePurePathSubclass, self.cls): + pass + p = P(BASE, session_id=42) + self.assertEqual(42, p.absolute().session_id) + self.assertEqual(42, p.resolve().session_id) + if not is_wasi: # WASI has no user accounts. + self.assertEqual(42, p.with_segments('~').expanduser().session_id) + self.assertEqual(42, (p / 'fileA').rename(p / 'fileB').session_id) + self.assertEqual(42, (p / 'fileB').replace(p / 'fileA').session_id) + if os_helper.can_symlink(): + self.assertEqual(42, (p / 'linkA').readlink().session_id) + for path in p.iterdir(): + self.assertEqual(42, path.session_id) + for path in p.glob('*'): + self.assertEqual(42, path.session_id) + for path in p.rglob('*'): + self.assertEqual(42, path.session_id) + for dirpath, dirnames, filenames in p.walk(): + self.assertEqual(42, dirpath.session_id) + def test_samefile(self): fileA_path = os.path.join(BASE, 'fileA') fileB_path = os.path.join(BASE, 'dirB', 'fileB') @@ -1505,6 +1731,7 @@ def test_empty_path(self): p = self.cls('') self.assertEqual(p.stat(), os.stat('.')) + @unittest.skipIf(is_wasi, "WASI has no user accounts.") def test_expanduser_common(self): P = self.cls p = P('~') @@ -1517,6 +1744,8 @@ def test_expanduser_common(self): self.assertEqual(p.expanduser(), p) p = P(P('').absolute().anchor) / '~' self.assertEqual(p.expanduser(), p) + p = P('~/a:b') + self.assertEqual(p.expanduser(), P(os.path.expanduser('~'), './a:b')) def test_exists(self): P = self.cls @@ -1530,6 +1759,8 @@ def test_exists(self): self.assertIs(True, (p / 'linkB').exists()) self.assertIs(True, (p / 'linkB' / 'fileB').exists()) self.assertIs(False, (p / 'linkA' / 'bah').exists()) + self.assertIs(False, (p / 'brokenLink').exists()) + self.assertIs(True, (p / 'brokenLink').exists(follow_symlinks=False)) self.assertIs(False, (p / 'foo').exists()) self.assertIs(False, P('/xyzzy').exists()) self.assertIs(False, P(BASE + '\udfff').exists()) @@ -1636,16 +1867,36 @@ def _check(glob, expected): _check(p.glob("*/fileB"), ['dirB/fileB']) else: _check(p.glob("*/fileB"), ['dirB/fileB', 'linkB/fileB']) + if os_helper.can_symlink(): + _check(p.glob("brokenLink"), ['brokenLink']) + + if not os_helper.can_symlink(): + _check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE"]) + else: + _check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"]) + + def test_glob_case_sensitive(self): + P = self.cls + def _check(path, pattern, case_sensitive, expected): + actual = {str(q) for q in path.glob(pattern, case_sensitive=case_sensitive)} + expected = {str(P(BASE, q)) for q in expected} + self.assertEqual(actual, expected) + path = P(BASE) + _check(path, "DIRB/FILE*", True, []) + _check(path, "DIRB/FILE*", False, ["dirB/fileB"]) + _check(path, "dirb/file*", True, []) + _check(path, "dirb/file*", False, ["dirB/fileB"]) def test_rglob_common(self): def _check(glob, expected): - self.assertEqual(set(glob), { P(BASE, q) for q in expected }) + self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected)) P = self.cls p = P(BASE) it = p.rglob("fileA") self.assertIsInstance(it, collections.abc.Iterator) _check(it, ["fileA"]) _check(p.rglob("fileB"), ["dirB/fileB"]) + _check(p.rglob("**/fileB"), ["dirB/fileB"]) _check(p.rglob("*/fileA"), []) if not os_helper.can_symlink(): _check(p.rglob("*/fileB"), ["dirB/fileB"]) @@ -1654,9 +1905,30 @@ def _check(glob, expected): "linkB/fileB", "dirA/linkC/fileB"]) _check(p.rglob("file*"), ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD"]) + if not os_helper.can_symlink(): + _check(p.rglob("*/"), [ + "dirA", "dirB", "dirC", "dirC/dirD", "dirE", + ]) + else: + _check(p.rglob("*/"), [ + "dirA", "dirA/linkC", "dirB", "dirB/linkD", "dirC", + "dirC/dirD", "dirE", "linkB", + ]) + _check(p.rglob(""), ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"]) + p = P(BASE, "dirC") + _check(p.rglob("*"), ["dirC/fileC", "dirC/novel.txt", + "dirC/dirD", "dirC/dirD/fileD"]) _check(p.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p.rglob("**/file*"), ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p.rglob("dir*/**"), ["dirC/dirD"]) _check(p.rglob("*/*"), ["dirC/dirD/fileD"]) + _check(p.rglob("*/"), ["dirC/dirD"]) + _check(p.rglob(""), ["dirC", "dirC/dirD"]) + _check(p.rglob("**"), ["dirC", "dirC/dirD"]) + # gh-91616, a re module regression + _check(p.rglob("*.txt"), ["dirC/novel.txt"]) + _check(p.rglob("*.*"), ["dirC/novel.txt"]) @os_helper.skip_unless_symlink def test_rglob_symlink_loop(self): @@ -1667,7 +1939,8 @@ def test_rglob_symlink_loop(self): expect = {'brokenLink', 'dirA', 'dirA/linkC', 'dirB', 'dirB/fileB', 'dirB/linkD', - 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', 'dirC/fileC', + 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', + 'dirC/fileC', 'dirC/novel.txt', 'dirE', 'fileA', 'linkA', @@ -1698,8 +1971,13 @@ def test_glob_dotdot(self): P = self.cls p = P(BASE) self.assertEqual(set(p.glob("..")), { P(BASE, "..") }) + self.assertEqual(set(p.glob("../..")), { P(BASE, "..", "..") }) + self.assertEqual(set(p.glob("dirA/..")), { P(BASE, "dirA", "..") }) self.assertEqual(set(p.glob("dirA/../file*")), { P(BASE, "dirA/../fileA") }) + self.assertEqual(set(p.glob("dirA/../file*/..")), set()) self.assertEqual(set(p.glob("../xyzzy")), set()) + self.assertEqual(set(p.glob("xyzzy/..")), set()) + self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(BASE, *[".."] * 50)}) @os_helper.skip_unless_symlink def test_glob_permissions(self): @@ -1707,36 +1985,39 @@ def test_glob_permissions(self): P = self.cls base = P(BASE) / 'permissions' base.mkdir() + self.addCleanup(os_helper.rmtree, base) - file1 = base / "file1" - file1.touch() - file2 = base / "file2" - file2.touch() - - subdir = base / "subdir" - - file3 = base / "file3" - file3.symlink_to(subdir / "other") - - # Patching is needed to avoid relying on the filesystem - # to return the order of the files as the error will not - # happen if the symlink is the last item. - - with mock.patch("os.scandir") as scandir: - scandir.return_value = sorted(os.scandir(base)) - self.assertEqual(len(set(base.glob("*"))), 3) + for i in range(100): + link = base / f"link{i}" + if i % 2: + link.symlink_to(P(BASE, "dirE", "nonexistent")) + else: + link.symlink_to(P(BASE, "dirC")) - subdir.mkdir() + self.assertEqual(len(set(base.glob("*"))), 100) + self.assertEqual(len(set(base.glob("*/"))), 50) + self.assertEqual(len(set(base.glob("*/fileC"))), 50) + self.assertEqual(len(set(base.glob("*/file*"))), 50) - with mock.patch("os.scandir") as scandir: - scandir.return_value = sorted(os.scandir(base)) - self.assertEqual(len(set(base.glob("*"))), 4) + @os_helper.skip_unless_symlink + def test_glob_long_symlink(self): + # See gh-87695 + base = self.cls(BASE) / 'long_symlink' + base.mkdir() + bad_link = base / 'bad_link' + bad_link.symlink_to("bad" * 200) + self.assertEqual(sorted(base.glob('**/*')), [bad_link]) - subdir.chmod(000) + def test_glob_above_recursion_limit(self): + recursion_limit = 50 + # directory_depth > recursion_limit + directory_depth = recursion_limit + 10 + base = pathlib.Path(os_helper.TESTFN, 'deep') + path = pathlib.Path(base, *(['d'] * directory_depth)) + path.mkdir(parents=True) - with mock.patch("os.scandir") as scandir: - scandir.return_value = sorted(os.scandir(base)) - self.assertEqual(len(set(base.glob("*"))), 4) + with set_recursion_limit(recursion_limit): + list(base.glob('**')) def _check_resolve(self, p, expected, strict=True): q = p.resolve(strict) @@ -1808,7 +2089,7 @@ def test_resolve_common(self): @os_helper.skip_unless_symlink def test_resolve_dot(self): - # See https://bitbucket.org/pitrou/pathlib/issue/9/pathresolve-fails-on-complex-symlinks + # See http://web.archive.org/web/20200623062557/https://bitbucket.org/pitrou/pathlib/issues/9/ p = self.cls(BASE) self.dirlink('.', join('0')) self.dirlink(os.path.join('0', '0'), join('1')) @@ -1835,8 +2116,10 @@ def test_with(self): it = p.iterdir() it2 = p.iterdir() next(it2) - with p: - pass + # bpo-46556: path context managers are deprecated in Python 3.11. + with self.assertWarns(DeprecationWarning): + with p: + pass # Using a path as a context manager is a no-op, thus the following # operations should still succeed after the context manage exits. next(it) @@ -1844,9 +2127,11 @@ def test_with(self): p.exists() p.resolve() p.absolute() - with p: - pass + with self.assertWarns(DeprecationWarning): + with p: + pass + @os_helper.skip_unless_working_chmod def test_chmod(self): p = self.cls(BASE) / 'fileA' mode = p.stat().st_mode @@ -1861,6 +2146,7 @@ def test_chmod(self): # On Windows, os.chmod does not follow symlinks (issue #15411) @only_posix + @os_helper.skip_unless_working_chmod def test_chmod_follow_symlinks_true(self): p = self.cls(BASE) / 'linkA' q = p.resolve() @@ -1876,6 +2162,7 @@ def test_chmod_follow_symlinks_true(self): # XXX also need a test for lchmod. + @os_helper.skip_unless_working_chmod def test_stat(self): p = self.cls(BASE) / 'fileA' st = p.stat() @@ -1948,28 +2235,6 @@ def test_rmdir(self): self.assertFileNotFound(p.stat) self.assertFileNotFound(p.unlink) - @unittest.skipUnless(hasattr(os, "link"), "os.link() is not present") - def test_link_to(self): - P = self.cls(BASE) - p = P / 'fileA' - size = p.stat().st_size - # linking to another path. - q = P / 'dirA' / 'fileAA' - try: - with self.assertWarns(DeprecationWarning): - p.link_to(q) - except PermissionError as e: - self.skipTest('os.link(): %s' % e) - self.assertEqual(q.stat().st_size, size) - self.assertEqual(os.path.samefile(p, q), True) - self.assertTrue(p.stat) - # Linking to a str of a relative path. - r = rel_join('fileAAA') - with self.assertWarns(DeprecationWarning): - q.link_to(r) - self.assertEqual(os.stat(r).st_size, size) - self.assertTrue(q.stat) - @unittest.skipUnless(hasattr(os, "link"), "os.link() is not present") def test_hardlink_to(self): P = self.cls(BASE) @@ -1995,7 +2260,7 @@ def test_link_to_not_implemented(self): # linking to another path. q = P / 'dirA' / 'fileAA' with self.assertRaises(NotImplementedError): - p.link_to(q) + q.hardlink_to(p) def test_rename(self): P = self.cls(BASE) @@ -2137,6 +2402,7 @@ def test_mkdir_exist_ok_with_parent(self): self.assertTrue(p.exists()) self.assertEqual(p.stat().st_ctime, st_ctime_first) + @unittest.skipIf(is_emscripten, "FS root cannot be modified on Emscripten.") def test_mkdir_exist_ok_root(self): # Issue #25803: A drive root could raise PermissionError on Windows. self.cls('/').resolve().mkdir(exist_ok=True) @@ -2182,6 +2448,7 @@ def test_mkdir_concurrent_parent_creation(self): p = self.cls(BASE, 'dirCPC%d' % pattern_num) self.assertFalse(p.exists()) + real_mkdir = os.mkdir def my_mkdir(path, mode=0o777): path = str(path) # Emulate another process that would create the directory @@ -2190,15 +2457,15 @@ def my_mkdir(path, mode=0o777): # function is called at most 5 times (dirCPC/dir1/dir2, # dirCPC/dir1, dirCPC, dirCPC/dir1, dirCPC/dir1/dir2). if pattern.pop(): - os.mkdir(path, mode) # From another process. + real_mkdir(path, mode) # From another process. concurrently_created.add(path) - os.mkdir(path, mode) # Our real call. + real_mkdir(path, mode) # Our real call. pattern = [bool(pattern_num & (1 << n)) for n in range(5)] concurrently_created = set() p12 = p / 'dir1' / 'dir2' try: - with mock.patch("pathlib._normal_accessor.mkdir", my_mkdir): + with mock.patch("os.mkdir", my_mkdir): p12.mkdir(parents=True, exist_ok=False) except FileExistsError: self.assertIn(str(p12), concurrently_created) @@ -2256,10 +2523,12 @@ def test_is_file(self): self.assertIs((P / 'fileA\udfff').is_file(), False) self.assertIs((P / 'fileA\x00').is_file(), False) - @only_posix def test_is_mount(self): P = self.cls(BASE) - R = self.cls('/') # TODO: Work out Windows. + if os.name == 'nt': + R = self.cls('c:\\') + else: + R = self.cls('/') self.assertFalse((P / 'fileA').is_mount()) self.assertFalse((P / 'dirA').is_mount()) self.assertFalse((P / 'non-existing').is_mount()) @@ -2267,8 +2536,7 @@ def test_is_mount(self): self.assertTrue(R.is_mount()) if os_helper.can_symlink(): self.assertFalse((P / 'linkA').is_mount()) - self.assertIs(self.cls('/\udfff').is_mount(), False) - self.assertIs(self.cls('/\x00').is_mount(), False) + self.assertIs((R / '\udfff').is_mount(), False) def test_is_symlink(self): P = self.cls(BASE) @@ -2286,6 +2554,13 @@ def test_is_symlink(self): self.assertIs((P / 'linkA\udfff').is_file(), False) self.assertIs((P / 'linkA\x00').is_file(), False) + def test_is_junction(self): + P = self.cls(BASE) + + with mock.patch.object(P._flavour, 'isjunction'): + self.assertEqual(P.is_junction(), P._flavour.isjunction.return_value) + P._flavour.isjunction.assert_called_once_with(P) + def test_is_fifo_false(self): P = self.cls(BASE) self.assertFalse((P / 'fileA').is_fifo()) @@ -2320,6 +2595,12 @@ def test_is_socket_false(self): self.assertIs((P / 'fileA\x00').is_socket(), False) @unittest.skipUnless(hasattr(socket, "AF_UNIX"), "Unix sockets required") + @unittest.skipIf( + is_emscripten, "Unix sockets are not implemented on Emscripten." + ) + @unittest.skipIf( + is_wasi, "Cannot create socket on WASI." + ) def test_is_socket_true(self): P = self.cls(BASE, 'mysock') sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) @@ -2355,15 +2636,15 @@ def test_is_char_device_false(self): self.assertIs((P / 'fileA\x00').is_char_device(), False) def test_is_char_device_true(self): - # Under Unix, /dev/null should generally be a char device. - P = self.cls('/dev/null') + # os.devnull should generally be a char device. + P = self.cls(os.devnull) if not P.exists(): - self.skipTest("/dev/null required") + self.skipTest("null device required") self.assertTrue(P.is_char_device()) self.assertFalse(P.is_block_device()) self.assertFalse(P.is_file()) - self.assertIs(self.cls('/dev/null\udfff').is_char_device(), False) - self.assertIs(self.cls('/dev/null\x00').is_char_device(), False) + self.assertIs(self.cls(f'{os.devnull}\udfff').is_char_device(), False) + self.assertIs(self.cls(f'{os.devnull}\x00').is_char_device(), False) def test_pickling_common(self): p = self.cls(BASE, 'fileA') @@ -2434,13 +2715,238 @@ def test_complex_symlinks_relative(self): def test_complex_symlinks_relative_dot_dot(self): self._check_complex_symlinks(os.path.join('dirA', '..')) + def test_passing_kwargs_deprecated(self): + with self.assertWarns(DeprecationWarning): + self.cls(foo="bar") + + +class WalkTests(unittest.TestCase): + + def setUp(self): + self.addCleanup(os_helper.rmtree, os_helper.TESTFN) + + # Build: + # TESTFN/ + # TEST1/ a file kid and two directory kids + # tmp1 + # SUB1/ a file kid and a directory kid + # tmp2 + # SUB11/ no kids + # SUB2/ a file kid and a dirsymlink kid + # tmp3 + # SUB21/ not readable + # tmp5 + # link/ a symlink to TEST2 + # broken_link + # broken_link2 + # broken_link3 + # TEST2/ + # tmp4 a lone file + self.walk_path = pathlib.Path(os_helper.TESTFN, "TEST1") + self.sub1_path = self.walk_path / "SUB1" + self.sub11_path = self.sub1_path / "SUB11" + self.sub2_path = self.walk_path / "SUB2" + sub21_path= self.sub2_path / "SUB21" + tmp1_path = self.walk_path / "tmp1" + tmp2_path = self.sub1_path / "tmp2" + tmp3_path = self.sub2_path / "tmp3" + tmp5_path = sub21_path / "tmp3" + self.link_path = self.sub2_path / "link" + t2_path = pathlib.Path(os_helper.TESTFN, "TEST2") + tmp4_path = pathlib.Path(os_helper.TESTFN, "TEST2", "tmp4") + broken_link_path = self.sub2_path / "broken_link" + broken_link2_path = self.sub2_path / "broken_link2" + broken_link3_path = self.sub2_path / "broken_link3" + + os.makedirs(self.sub11_path) + os.makedirs(self.sub2_path) + os.makedirs(sub21_path) + os.makedirs(t2_path) + + for path in tmp1_path, tmp2_path, tmp3_path, tmp4_path, tmp5_path: + with open(path, "x", encoding='utf-8') as f: + f.write(f"I'm {path} and proud of it. Blame test_pathlib.\n") + + if os_helper.can_symlink(): + os.symlink(os.path.abspath(t2_path), self.link_path) + os.symlink('broken', broken_link_path, True) + os.symlink(pathlib.Path('tmp3', 'broken'), broken_link2_path, True) + os.symlink(pathlib.Path('SUB21', 'tmp5'), broken_link3_path, True) + self.sub2_tree = (self.sub2_path, ["SUB21"], + ["broken_link", "broken_link2", "broken_link3", + "link", "tmp3"]) + else: + self.sub2_tree = (self.sub2_path, ["SUB21"], ["tmp3"]) + + if not is_emscripten: + # Emscripten fails with inaccessible directories. + os.chmod(sub21_path, 0) + try: + os.listdir(sub21_path) + except PermissionError: + self.addCleanup(os.chmod, sub21_path, stat.S_IRWXU) + else: + os.chmod(sub21_path, stat.S_IRWXU) + os.unlink(tmp5_path) + os.rmdir(sub21_path) + del self.sub2_tree[1][:1] + + def test_walk_topdown(self): + walker = self.walk_path.walk() + entry = next(walker) + entry[1].sort() # Ensure we visit SUB1 before SUB2 + self.assertEqual(entry, (self.walk_path, ["SUB1", "SUB2"], ["tmp1"])) + entry = next(walker) + self.assertEqual(entry, (self.sub1_path, ["SUB11"], ["tmp2"])) + entry = next(walker) + self.assertEqual(entry, (self.sub11_path, [], [])) + entry = next(walker) + entry[1].sort() + entry[2].sort() + self.assertEqual(entry, self.sub2_tree) + with self.assertRaises(StopIteration): + next(walker) + + def test_walk_prune(self, walk_path=None): + if walk_path is None: + walk_path = self.walk_path + # Prune the search. + all = [] + for root, dirs, files in walk_path.walk(): + all.append((root, dirs, files)) + if 'SUB1' in dirs: + # Note that this also mutates the dirs we appended to all! + dirs.remove('SUB1') + + self.assertEqual(len(all), 2) + self.assertEqual(all[0], (self.walk_path, ["SUB2"], ["tmp1"])) + + all[1][-1].sort() + all[1][1].sort() + self.assertEqual(all[1], self.sub2_tree) + + def test_file_like_path(self): + self.test_walk_prune(FakePath(self.walk_path).__fspath__()) + + def test_walk_bottom_up(self): + seen_testfn = seen_sub1 = seen_sub11 = seen_sub2 = False + for path, dirnames, filenames in self.walk_path.walk(top_down=False): + if path == self.walk_path: + self.assertFalse(seen_testfn) + self.assertTrue(seen_sub1) + self.assertTrue(seen_sub2) + self.assertEqual(sorted(dirnames), ["SUB1", "SUB2"]) + self.assertEqual(filenames, ["tmp1"]) + seen_testfn = True + elif path == self.sub1_path: + self.assertFalse(seen_testfn) + self.assertFalse(seen_sub1) + self.assertTrue(seen_sub11) + self.assertEqual(dirnames, ["SUB11"]) + self.assertEqual(filenames, ["tmp2"]) + seen_sub1 = True + elif path == self.sub11_path: + self.assertFalse(seen_sub1) + self.assertFalse(seen_sub11) + self.assertEqual(dirnames, []) + self.assertEqual(filenames, []) + seen_sub11 = True + elif path == self.sub2_path: + self.assertFalse(seen_testfn) + self.assertFalse(seen_sub2) + self.assertEqual(sorted(dirnames), sorted(self.sub2_tree[1])) + self.assertEqual(sorted(filenames), sorted(self.sub2_tree[2])) + seen_sub2 = True + else: + raise AssertionError(f"Unexpected path: {path}") + self.assertTrue(seen_testfn) + + @os_helper.skip_unless_symlink + def test_walk_follow_symlinks(self): + walk_it = self.walk_path.walk(follow_symlinks=True) + for root, dirs, files in walk_it: + if root == self.link_path: + self.assertEqual(dirs, []) + self.assertEqual(files, ["tmp4"]) + break + else: + self.fail("Didn't follow symlink with follow_symlinks=True") + + @os_helper.skip_unless_symlink + def test_walk_symlink_location(self): + # Tests whether symlinks end up in filenames or dirnames depending + # on the `follow_symlinks` argument. + walk_it = self.walk_path.walk(follow_symlinks=False) + for root, dirs, files in walk_it: + if root == self.sub2_path: + self.assertIn("link", files) + break + else: + self.fail("symlink not found") + + walk_it = self.walk_path.walk(follow_symlinks=True) + for root, dirs, files in walk_it: + if root == self.sub2_path: + self.assertIn("link", dirs) + break + + def test_walk_bad_dir(self): + errors = [] + walk_it = self.walk_path.walk(on_error=errors.append) + root, dirs, files = next(walk_it) + self.assertEqual(errors, []) + dir1 = 'SUB1' + path1 = root / dir1 + path1new = (root / dir1).with_suffix(".new") + path1.rename(path1new) + try: + roots = [r for r, _, _ in walk_it] + self.assertTrue(errors) + self.assertNotIn(path1, roots) + self.assertNotIn(path1new, roots) + for dir2 in dirs: + if dir2 != dir1: + self.assertIn(root / dir2, roots) + finally: + path1new.rename(path1) + + def test_walk_many_open_files(self): + depth = 30 + base = pathlib.Path(os_helper.TESTFN, 'deep') + path = pathlib.Path(base, *(['d']*depth)) + path.mkdir(parents=True) + + iters = [base.walk(top_down=False) for _ in range(100)] + for i in range(depth + 1): + expected = (path, ['d'] if i else [], []) + for it in iters: + self.assertEqual(next(it), expected) + path = path.parent + + iters = [base.walk(top_down=True) for _ in range(100)] + path = base + for i in range(depth + 1): + expected = (path, ['d'] if i < depth else [], []) + for it in iters: + self.assertEqual(next(it), expected) + path = path / 'd' + + def test_walk_above_recursion_limit(self): + recursion_limit = 40 + # directory_depth > recursion_limit + directory_depth = recursion_limit + 10 + base = pathlib.Path(os_helper.TESTFN, 'deep') + path = pathlib.Path(base, *(['d'] * directory_depth)) + path.mkdir(parents=True) + + with set_recursion_limit(recursion_limit): + list(base.walk()) + list(base.walk(top_down=False)) + class PathTest(_BasePathTest, unittest.TestCase): cls = pathlib.Path - def test_class_getitem(self): - self.assertIs(self.cls[str], self.cls) - def test_concrete_class(self): p = self.cls('a') self.assertIs(type(p), @@ -2462,11 +2968,26 @@ def test_glob_empty_pattern(self): class PosixPathTest(_BasePathTest, unittest.TestCase): cls = pathlib.PosixPath + def test_absolute(self): + P = self.cls + self.assertEqual(str(P('/').absolute()), '/') + self.assertEqual(str(P('/a').absolute()), '/a') + self.assertEqual(str(P('/a/b').absolute()), '/a/b') + + # '//'-prefixed absolute path (supported by POSIX). + self.assertEqual(str(P('//').absolute()), '//') + self.assertEqual(str(P('//a').absolute()), '//a') + self.assertEqual(str(P('//a/b').absolute()), '//a/b') + def _check_symlink_loop(self, *args, strict=True): path = self.cls(*args) with self.assertRaises(RuntimeError): print(path.resolve(strict)) + @unittest.skipIf( + is_emscripten or is_wasi, + "umask is not implemented on Emscripten/WASI." + ) def test_open_mode(self): old_mask = os.umask(0) self.addCleanup(os.umask, old_mask) @@ -2490,6 +3011,10 @@ def test_resolve_root(self): finally: os.chdir(current_directory) + @unittest.skipIf( + is_emscripten or is_wasi, + "umask is not implemented on Emscripten/WASI." + ) def test_touch_mode(self): old_mask = os.umask(0) self.addCleanup(os.umask, old_mask) @@ -2627,19 +3152,66 @@ def test_handling_bad_descriptor(self): class WindowsPathTest(_BasePathTest, unittest.TestCase): cls = pathlib.WindowsPath + def test_absolute(self): + P = self.cls + + # Simple absolute paths. + self.assertEqual(str(P('c:\\').absolute()), 'c:\\') + self.assertEqual(str(P('c:\\a').absolute()), 'c:\\a') + self.assertEqual(str(P('c:\\a\\b').absolute()), 'c:\\a\\b') + + # UNC absolute paths. + share = '\\\\server\\share\\' + self.assertEqual(str(P(share).absolute()), share) + self.assertEqual(str(P(share + 'a').absolute()), share + 'a') + self.assertEqual(str(P(share + 'a\\b').absolute()), share + 'a\\b') + + # UNC relative paths. + with mock.patch("os.getcwd") as getcwd: + getcwd.return_value = share + + self.assertEqual(str(P().absolute()), share) + self.assertEqual(str(P('.').absolute()), share) + self.assertEqual(str(P('a').absolute()), os.path.join(share, 'a')) + self.assertEqual(str(P('a', 'b', 'c').absolute()), + os.path.join(share, 'a', 'b', 'c')) + + drive = os.path.splitdrive(BASE)[0] + with os_helper.change_cwd(BASE): + # Relative path with root + self.assertEqual(str(P('\\').absolute()), drive + '\\') + self.assertEqual(str(P('\\foo').absolute()), drive + '\\foo') + + # Relative path on current drive + self.assertEqual(str(P(drive).absolute()), BASE) + self.assertEqual(str(P(drive + 'foo').absolute()), os.path.join(BASE, 'foo')) + + with os_helper.subst_drive(BASE) as other_drive: + # Set the working directory on the substitute drive + saved_cwd = os.getcwd() + other_cwd = f'{other_drive}\\dirA' + os.chdir(other_cwd) + os.chdir(saved_cwd) + + # Relative path on another drive + self.assertEqual(str(P(other_drive).absolute()), other_cwd) + self.assertEqual(str(P(other_drive + 'foo').absolute()), other_cwd + '\\foo') + def test_glob(self): P = self.cls p = P(BASE) self.assertEqual(set(p.glob("FILEa")), { P(BASE, "fileA") }) + self.assertEqual(set(p.glob("*a\\")), { P(BASE, "dirA") }) self.assertEqual(set(p.glob("F*a")), { P(BASE, "fileA") }) - self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\FILEa"}) + self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"}) self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"}) def test_rglob(self): P = self.cls p = P(BASE, "dirC") self.assertEqual(set(p.rglob("FILEd")), { P(BASE, "dirC/dirD/fileD") }) - self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\FILEd"}) + self.assertEqual(set(p.rglob("*\\")), { P(BASE, "dirC/dirD") }) + self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"}) def test_expanduser(self): P = self.cls @@ -2695,6 +3267,22 @@ def check(): check() +class PurePathSubclassTest(_BasePurePathTest, unittest.TestCase): + class cls(pathlib.PurePath): + pass + + # repr() roundtripping is not supported in custom subclass. + test_repr_roundtrips = None + + +class PathSubclassTest(_BasePathTest, unittest.TestCase): + class cls(pathlib.Path): + pass + + # repr() roundtripping is not supported in custom subclass. + test_repr_roundtrips = None + + class CompatiblePathTest(unittest.TestCase): """ Test that a type can be made compatible with PurePath