Thanks to visit codestin.com
Credit goes to github.com

Skip to content

GH-101362: Omit path anchor from pathlib.PurePath()._parts #102476

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
171 changes: 106 additions & 65 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,20 +210,17 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
class _PathParents(Sequence):
"""This object provides sequence-like access to the logical ancestors
of a path. Don't try to construct it yourself."""
__slots__ = ('_pathcls', '_drv', '_root', '_parts')
__slots__ = ('_pathcls', '_drv', '_root', '_tail')

def __init__(self, path):
# We don't store the instance to avoid reference cycles
self._pathcls = type(path)
self._drv = path.drive
self._root = path.root
self._parts = path._parts
self._tail = path._tail

def __len__(self):
if self._drv or self._root:
return len(self._parts) - 1
else:
return len(self._parts)
return len(self._tail)

def __getitem__(self, idx):
if isinstance(idx, slice):
Expand All @@ -234,7 +231,7 @@ def __getitem__(self, idx):
if idx < 0:
idx += len(self)
return self._pathcls._from_parsed_parts(self._drv, self._root,
self._parts[:-idx - 1])
self._tail[:-idx - 1])

def __repr__(self):
return "<{}.parents>".format(self._pathcls.__name__)
Expand All @@ -249,9 +246,41 @@ class PurePath(object):
PureWindowsPath object. You can also instantiate either of these classes
directly, regardless of your system.
"""

__slots__ = (
'_raw_path', '_drv', '_root', '_parts_cached',
'_str', '_hash', '_parts_tuple', '_parts_normcase_cached',
# The `_raw_path` slot stores an unnormalized string path. This is set
# in the `__init__()` method.
'_raw_path',

# The `_drv`, `_root` and `_tail_cached` slots store parsed and
# normalized parts of the path. They are set when any of the `drive`,
# `root` or `_tail` properties are accessed for the first time. The
# three-part division corresponds to the result of
# `os.path.splitroot()`, except that the tail is further split on path
# separators (i.e. it is a list of strings), and that the root and
# tail are normalized.
'_drv', '_root', '_tail_cached',

# The `_str` slot stores the string representation of the path,
# computed from the drive, root and tail when `__str__()` is called
# for the first time. It's used to implement `_str_normcase`
'_str',

# The `_str_normcase_cached` slot stores the string path with
# normalized case. It is set when the `_str_normcase` property is
# accessed for the first time. It's used to implement `__eq__()`
# `__hash__()`, and `_parts_normcase`
'_str_normcase_cached',

# The `_parts_normcase_cached` slot stores the case-normalized
# string path after splitting on path separators. It's set when the
# `_parts_normcase` property is accessed for the first time. It's used
# to implement comparison methods like `__lt__()`.
'_parts_normcase_cached',

# The `_hash` slot stores the hash of the case-normalized string
# path. It's set when `__hash__()` is called for the first time.
'_hash',
)
_flavour = os.path

Expand All @@ -277,10 +306,7 @@ def __init__(self, *args):
path = os.fspath(args[0])
else:
path = self._flavour.join(*args)
if isinstance(path, str):
# Force-cast str subclasses to str (issue #21127)
path = str(path)
else:
if not isinstance(path, str):
raise TypeError(
"argument should be a str or an os.PathLike "
"object where __fspath__ returns a str, "
Expand All @@ -299,33 +325,32 @@ def _parse_path(cls, path):
if drv.startswith(sep):
# pathlib assumes that UNC paths always have a root.
root = sep
unfiltered_parsed = [drv + root] + rel.split(sep)
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.']
return drv, root, parsed

def _load_parts(self):
drv, root, parts = self._parse_path(self._raw_path)
drv, root, tail = self._parse_path(self._raw_path)
self._drv = drv
self._root = root
self._parts_cached = parts
self._tail_cached = tail

@classmethod
def _from_parsed_parts(cls, drv, root, parts):
path = cls._format_parsed_parts(drv, root, parts)
def _from_parsed_parts(cls, drv, root, tail):
path = cls._format_parsed_parts(drv, root, tail)
self = cls(path)
self._str = path or '.'
self._drv = drv
self._root = root
self._parts_cached = parts
self._tail_cached = tail
return self

@classmethod
def _format_parsed_parts(cls, drv, root, parts):
def _format_parsed_parts(cls, drv, root, tail):
if drv or root:
return drv + root + cls._flavour.sep.join(parts[1:])
elif parts and cls._flavour.splitdrive(parts[0])[0]:
parts = ['.'] + parts
return cls._flavour.sep.join(parts)
return drv + root + cls._flavour.sep.join(tail)
elif tail and cls._flavour.splitdrive(tail[0])[0]:
tail = ['.'] + tail
return cls._flavour.sep.join(tail)

def __str__(self):
"""Return the string representation of the path, suitable for
Expand All @@ -334,7 +359,7 @@ def __str__(self):
return self._str
except AttributeError:
self._str = self._format_parsed_parts(self.drive, self.root,
self._parts) or '.'
self._tail) or '.'
return self._str

def __fspath__(self):
Expand Down Expand Up @@ -374,25 +399,34 @@ def as_uri(self):
path = str(self)
return prefix + urlquote_from_bytes(os.fsencode(path))

@property
def _str_normcase(self):
# String with normalized case, for hashing and equality checks
try:
return self._str_normcase_cached
except AttributeError:
self._str_normcase_cached = self._flavour.normcase(str(self))
return self._str_normcase_cached

@property
def _parts_normcase(self):
# Cached parts with normalized case, for hashing and comparison.
# Cached parts with normalized case, for comparisons.
try:
return self._parts_normcase_cached
except AttributeError:
self._parts_normcase_cached = [self._flavour.normcase(p) for p in self._parts]
self._parts_normcase_cached = self._str_normcase.split(self._flavour.sep)
return self._parts_normcase_cached

def __eq__(self, other):
if not isinstance(other, PurePath):
return NotImplemented
return self._parts_normcase == other._parts_normcase and self._flavour is other._flavour
return self._str_normcase == other._str_normcase and self._flavour is other._flavour

def __hash__(self):
try:
return self._hash
except AttributeError:
self._hash = hash(tuple(self._parts_normcase))
self._hash = hash(self._str_normcase)
return self._hash

def __lt__(self, other):
Expand Down Expand Up @@ -434,12 +468,12 @@ def root(self):
return self._root

@property
def _parts(self):
def _tail(self):
try:
return self._parts_cached
return self._tail_cached
except AttributeError:
self._load_parts()
return self._parts_cached
return self._tail_cached

@property
def anchor(self):
Expand All @@ -450,10 +484,10 @@ def anchor(self):
@property
def name(self):
"""The final path component, if any."""
parts = self._parts
if len(parts) == (1 if (self.drive or self.root) else 0):
tail = self._tail
if not tail:
return ''
return parts[-1]
return tail[-1]

@property
def suffix(self):
Expand Down Expand Up @@ -501,7 +535,7 @@ def with_name(self, name):
if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail):
raise ValueError("Invalid name %r" % (name))
return self._from_parsed_parts(self.drive, self.root,
self._parts[:-1] + [name])
self._tail[:-1] + [name])

def with_stem(self, stem):
"""Return a new path with the stem changed."""
Expand All @@ -526,7 +560,7 @@ def with_suffix(self, suffix):
else:
name = name[:-len(old_suffix)] + suffix
return self._from_parsed_parts(self.drive, self.root,
self._parts[:-1] + [name])
self._tail[:-1] + [name])

def relative_to(self, other, /, *_deprecated, walk_up=False):
"""Return the relative path to another path identified by the passed
Expand All @@ -551,7 +585,7 @@ def relative_to(self, other, /, *_deprecated, walk_up=False):
raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors")
if step and not walk_up:
raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}")
parts = ('..',) * step + self.parts[len(path.parts):]
parts = ['..'] * step + self._tail[len(path._tail):]
return path_cls(*parts)

def is_relative_to(self, other, /, *_deprecated):
Expand All @@ -570,13 +604,10 @@ def is_relative_to(self, other, /, *_deprecated):
def parts(self):
"""An object providing sequence-like access to the
components in the filesystem path."""
# We cache the tuple to avoid building a new one each time .parts
# is accessed. XXX is this necessary?
try:
return self._parts_tuple
except AttributeError:
self._parts_tuple = tuple(self._parts)
return self._parts_tuple
if self.drive or self.root:
return (self.drive + self.root,) + tuple(self._tail)
else:
return tuple(self._tail)

def joinpath(self, *args):
"""Combine this path with one or several arguments, and return a
Expand All @@ -603,10 +634,10 @@ def parent(self):
"""The logical parent of the path."""
drv = self.drive
root = self.root
parts = self._parts
if len(parts) == 1 and (drv or root):
tail = self._tail
if not tail:
return self
return self._from_parsed_parts(drv, root, parts[:-1])
return self._from_parsed_parts(drv, root, tail[:-1])

@property
def parents(self):
Expand All @@ -624,29 +655,29 @@ def is_absolute(self):
def is_reserved(self):
"""Return True if the path contains one of the special names reserved
by the system, if any."""
if self._flavour is posixpath or not self._parts:
if self._flavour is posixpath or not self._tail:
return False

# NOTE: the rules for reserved names seem somewhat complicated
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
# exist). We err on the side of caution and return True for paths
# which are not considered reserved by Windows.
if self._parts[0].startswith('\\\\'):
if self.drive.startswith('\\\\'):
# UNC paths are never reserved.
return False
name = self._parts[-1].partition('.')[0].partition(':')[0].rstrip(' ')
name = self._tail[-1].partition('.')[0].partition(':')[0].rstrip(' ')
return name.upper() in _WIN_RESERVED_NAMES

def match(self, path_pattern):
"""
Return True if this path matches the given pattern.
"""
path_pattern = self._flavour.normcase(path_pattern)
drv, root, pat_parts = self._parse_path(path_pattern)
if not pat_parts:
pat = type(self)(path_pattern)
if not pat.parts:
raise ValueError("empty pattern")
pat_parts = pat._parts_normcase
parts = self._parts_normcase
if drv or root:
if pat.drive or pat.root:
if len(pat_parts) != len(parts):
return False
elif len(pat_parts) > len(parts):
Expand Down Expand Up @@ -707,11 +738,21 @@ def __new__(cls, *args, **kwargs):
cls = WindowsPath if os.name == 'nt' else PosixPath
return object.__new__(cls)

def _make_child_relpath(self, part):
# This is an optimization used for dir walking. `part` must be
# a single part relative to this path.
parts = self._parts + [part]
return self._from_parsed_parts(self.drive, self.root, parts)
def _make_child_relpath(self, name):
path_str = str(self)
tail = self._tail
if tail:
path_str = f'{path_str}{self._flavour.sep}{name}'
elif path_str != '.':
path_str = f'{path_str}{name}'
else:
path_str = name
path = type(self)(path_str)
path._str = path_str
path._drv = self.drive
path._root = self.root
path._tail_cached = tail + [name]
return path

def __enter__(self):
# In previous versions of pathlib, __exit__() marked this path as
Expand Down Expand Up @@ -1196,12 +1237,12 @@ def expanduser(self):
(as returned by os.path.expanduser)
"""
if (not (self.drive or self.root) and
self._parts and self._parts[0][:1] == '~'):
homedir = self._flavour.expanduser(self._parts[0])
self._tail and self._tail[0][:1] == '~'):
homedir = self._flavour.expanduser(self._tail[0])
if homedir[:1] == "~":
raise RuntimeError("Could not determine home directory.")
drv, root, parts = self._parse_path(homedir)
return self._from_parsed_parts(drv, root, parts + self._parts[1:])
drv, root, tail = self._parse_path(homedir)
return self._from_parsed_parts(drv, root, tail + self._tail[1:])

return self

Expand Down
2 changes: 0 additions & 2 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,6 @@ def test_parts_common(self):
p = P('a/b')
parts = p.parts
self.assertEqual(parts, ('a', 'b'))
# The object gets reused.
self.assertIs(parts, p.parts)
# When the path is absolute, the anchor is a separate part.
p = P('/a/b')
parts = p.parts
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Speed up :class:`pathlib.Path` construction by omitting the path anchor from
the internal list of path parts.