Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
GH-101362 - Optimise pathlib by deferring path normalisation
`PurePath` now normalises and splits paths only when necessary, e.g. when
`.name` or `.parent` is accessed. The result is cached. This speeds up path
object construction by around 4x.

`PurePath.__fspath__()` now returns an unnormalised path, which should be
transparent to filesystem APIs (else pathlib's normalisation is broken!).
This extends the earlier performance improvement to most impure `Path`
methods, and also speeds up pickling, `p.joinpath('bar')` and `p / 'bar'`.

This also fixes GH-76846 and GH-85281 by unifying path constructors and
adding an `__init__()` method.
  • Loading branch information
barneygale committed Feb 4, 2023
commit b428069b063d7641d721f1cf399c086f7e3c9729
170 changes: 82 additions & 88 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,8 @@ class _PathParents(Sequence):
def __init__(self, path):
# We don't store the instance to avoid reference cycles
self._pathcls = type(path)
self._drv = path._drv
self._root = path._root
self._drv = path.drive
self._root = path.root
self._parts = path._parts

def __len__(self):
Expand Down Expand Up @@ -251,33 +251,45 @@ class PurePath(object):
directly, regardless of your system.
"""
__slots__ = (
'_drv', '_root', '_parts',
'_fspath', '_drv', '_root', '_parts_cached',
'_str', '_hash', '_parts_tuple', '_parts_normcase_cached',
)
_flavour = os.path

def __new__(cls, *args):
def __new__(cls, *args, **kwargs):
"""Construct a PurePath from one or several strings and or existing
PurePath objects. The strings and path objects are combined so as
to yield a canonicalized path, which is incorporated into the
new PurePath object.
"""
if cls is PurePath:
cls = PureWindowsPath if os.name == 'nt' else PurePosixPath
return cls._from_parts(args)
return super().__new__(cls)

def __reduce__(self):
# Using the parts tuple helps share interned path parts
# when pickling related paths.
return (self.__class__, tuple(self._parts))
return (self.__class__, (self._fspath,))

def __init__(self, *args):
if not args:
path = ''
elif len(args) == 1:
path = os.fspath(args[0])
else:
path = self._flavour.join(*args)

if not isinstance(path, str):
raise TypeError(
"argument should be a str object or an os.PathLike "
"object returning str, not %r"
% type(path))
self._fspath = path

@classmethod
def _parse_parts(cls, parts):
if not parts:
def _parse_path(cls, path):
if not path:
return '', '', []
sep = cls._flavour.sep
altsep = cls._flavour.altsep
path = cls._flavour.join(*parts)
if altsep:
path = path.replace(altsep, sep)
drv, root, rel = cls._flavour.splitroot(path)
Expand All @@ -288,43 +300,18 @@ def _parse_parts(cls, parts):
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
return drv, root, parsed

@classmethod
def _parse_args(cls, args):
# This is useful when you don't want to create an instance, just
# canonicalize some constructor arguments.
parts = []
for a in args:
if isinstance(a, PurePath):
parts += a._parts
else:
a = os.fspath(a)
if isinstance(a, str):
# Force-cast str subclasses to str (issue #21127)
parts.append(str(a))
else:
raise TypeError(
"argument should be a str object or an os.PathLike "
"object returning str, not %r"
% type(a))
return cls._parse_parts(parts)

@classmethod
def _from_parts(cls, args):
# We need to call _parse_args on the instance, so as to get the
# right flavour.
self = object.__new__(cls)
drv, root, parts = self._parse_args(args)
def _load_parts(self):
drv, root, parts = self._parse_path(self._fspath)
self._drv = drv
self._root = root
self._parts = parts
return self
self._parts_cached = parts

@classmethod
def _from_parsed_parts(cls, drv, root, parts):
self = object.__new__(cls)
self = cls(cls._format_parsed_parts(drv, root, parts))
self._drv = drv
self._root = root
self._parts = parts
self._parts_cached = parts
return self

@classmethod
Expand All @@ -340,12 +327,12 @@ def __str__(self):
try:
return self._str
except AttributeError:
self._str = self._format_parsed_parts(self._drv, self._root,
self._str = self._format_parsed_parts(self.drive, self.root,
self._parts) or '.'
return self._str

def __fspath__(self):
return str(self)
return self._fspath or '.'

def as_posix(self):
"""Return the string representation of the path with forward (/)
Expand All @@ -356,7 +343,7 @@ def as_posix(self):
def __bytes__(self):
"""Return the bytes representation of the path. This is only
recommended to use under Unix."""
return os.fsencode(self)
return os.fsencode(str(self))

def __repr__(self):
return "{}({!r})".format(self.__class__.__name__, self.as_posix())
Expand All @@ -366,7 +353,7 @@ def as_uri(self):
if not self.is_absolute():
raise ValueError("relative path can't be expressed as a file URI")

drive = self._drv
drive = self.drive
if len(drive) == 2 and drive[1] == ':':
# It's a path on a local drive => 'file:///c:/a/b'
prefix = 'file:///' + drive
Expand Down Expand Up @@ -422,23 +409,43 @@ def __ge__(self, other):
return NotImplemented
return self._parts_normcase >= other._parts_normcase

drive = property(attrgetter('_drv'),
doc="""The drive prefix (letter or UNC path), if any.""")
@property
def drive(self):
"""The drive prefix (letter or UNC path), if any."""
try:
return self._drv
except AttributeError:
self._load_parts()
return self._drv

root = property(attrgetter('_root'),
doc="""The root of the path, if any.""")
@property
def root(self):
"""The root of the path, if any."""
try:
return self._root
except AttributeError:
self._load_parts()
return self._root

@property
def _parts(self):
try:
return self._parts_cached
except AttributeError:
self._load_parts()
return self._parts_cached

@property
def anchor(self):
"""The concatenation of the drive and root, or ''."""
anchor = self._drv + self._root
anchor = self.drive + self.root
return anchor

@property
def name(self):
"""The final path component, if any."""
parts = self._parts
if len(parts) == (1 if (self._drv or self._root) else 0):
if len(parts) == (1 if (self.drive or self.root) else 0):
return ''
return parts[-1]

Expand Down Expand Up @@ -487,7 +494,7 @@ def with_name(self, name):
drv, root, tail = f.splitroot(name)
if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail):
raise ValueError("Invalid name %r" % (name))
return self._from_parsed_parts(self._drv, self._root,
return self._from_parsed_parts(self.drive, self.root,
self._parts[:-1] + [name])

def with_stem(self, stem):
Expand All @@ -512,7 +519,7 @@ def with_suffix(self, suffix):
name = name + suffix
else:
name = name[:-len(old_suffix)] + suffix
return self._from_parsed_parts(self._drv, self._root,
return self._from_parsed_parts(self.drive, self.root,
self._parts[:-1] + [name])

def relative_to(self, other, /, *_deprecated, walk_up=False):
Expand Down Expand Up @@ -571,22 +578,7 @@ def joinpath(self, *args):
paths) or a totally different path (if one of the arguments is
anchored).
"""
drv1, root1, parts1 = self._drv, self._root, self._parts
drv2, root2, parts2 = self._parse_args(args)
if root2:
if not drv2 and drv1:
return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:])
else:
return self._from_parsed_parts(drv2, root2, parts2)
elif drv2:
if drv2 == drv1 or self._flavour.normcase(drv2) == self._flavour.normcase(drv1):
# Same drive => second path is relative to the first.
return self._from_parsed_parts(drv1, root1, parts1 + parts2[1:])
else:
return self._from_parsed_parts(drv2, root2, parts2)
else:
# Second path is non-anchored (common case).
return self._from_parsed_parts(drv1, root1, parts1 + parts2)
return type(self)(self, *args)

def __truediv__(self, key):
try:
Expand All @@ -596,15 +588,15 @@ def __truediv__(self, key):

def __rtruediv__(self, key):
try:
return self._from_parts([key] + self._parts)
return type(self)(key, self)
except TypeError:
return NotImplemented

@property
def parent(self):
"""The logical parent of the path."""
drv = self._drv
root = self._root
drv = self.drive
root = self.root
parts = self._parts
if len(parts) == 1 and (drv or root):
return self
Expand All @@ -620,7 +612,7 @@ def is_absolute(self):
a drive)."""
# ntpath.isabs() is defective - see GH-44626 .
if self._flavour is ntpath:
return bool(self._drv and self._root)
return bool(self.drive and self.root)
return self._flavour.isabs(self)

def is_reserved(self):
Expand All @@ -644,12 +636,12 @@ def match(self, path_pattern):
Return True if this path matches the given pattern.
"""
path_pattern = self._flavour.normcase(path_pattern)
drv, root, pat_parts = self._parse_parts((path_pattern,))
drv, root, pat_parts = self._parse_path(path_pattern)
if not pat_parts:
raise ValueError("empty pattern")
elif drv and drv != self._flavour.normcase(self._drv):
elif drv and drv != self._flavour.normcase(self.drive):
return False
elif root and root != self._root:
elif root and root != self.root:
return False
parts = self._parts_normcase
if drv or root:
Expand Down Expand Up @@ -702,24 +694,26 @@ class Path(PurePath):
"""
__slots__ = ()

def __new__(cls, *args, **kwargs):
def __init__(self, *args, **kwargs):
if kwargs:
msg = ("support for supplying keyword arguments to pathlib.PurePath "
"is deprecated and scheduled for removal in Python {remove}")
warnings._deprecated("pathlib.PurePath(**kwargs)", msg, remove=(3, 14))
super().__init__(*args)

def __new__(cls, *args, **kwargs):
if cls is Path:
cls = WindowsPath if os.name == 'nt' else PosixPath
self = cls._from_parts(args)
if self._flavour is not os.path:
elif cls._flavour is not os.path:
raise NotImplementedError("cannot instantiate %r on your system"
% (cls.__name__,))
return self
return super().__new__(cls)

def _make_child_relpath(self, part):
# This is an optimization used for dir walking. `part` must be
# a single part relative to this path.
parts = self._parts + [part]
return self._from_parsed_parts(self._drv, self._root, parts)
return self._from_parsed_parts(self.drive, self.root, parts)

def __enter__(self):
# In previous versions of pathlib, __exit__() marked this path as
Expand Down Expand Up @@ -789,7 +783,7 @@ def glob(self, pattern):
sys.audit("pathlib.Path.glob", self, pattern)
if not pattern:
raise ValueError("Unacceptable pattern: {!r}".format(pattern))
drv, root, pattern_parts = self._parse_parts((pattern,))
drv, root, pattern_parts = self._parse_path(pattern)
if drv or root:
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
Expand All @@ -804,7 +798,7 @@ def rglob(self, pattern):
this subtree.
"""
sys.audit("pathlib.Path.rglob", self, pattern)
drv, root, pattern_parts = self._parse_parts((pattern,))
drv, root, pattern_parts = self._parse_path(pattern)
if drv or root:
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
Expand All @@ -821,7 +815,7 @@ def absolute(self):
"""
if self.is_absolute():
return self
return self._from_parts([os.getcwd()] + self._parts)
return type(self)(os.getcwd(), *self._parts)

def resolve(self, strict=False):
"""
Expand All @@ -839,7 +833,7 @@ def check_eloop(e):
except OSError as e:
check_eloop(e)
raise
p = self._from_parts((s,))
p = type(self)(s)

# In non-strict mode, realpath() doesn't raise on symlink loops.
# Ensure we get an exception by calling stat()
Expand Down Expand Up @@ -929,7 +923,7 @@ def readlink(self):
"""
if not hasattr(os, "readlink"):
raise NotImplementedError("os.readlink() not available on this system")
return self._from_parts((os.readlink(self),))
return type(self)(os.readlink(self))

def touch(self, mode=0o666, exist_ok=True):
"""
Expand Down Expand Up @@ -1198,12 +1192,12 @@ def expanduser(self):
""" Return a new path with expanded ~ and ~user constructs
(as returned by os.path.expanduser)
"""
if (not (self._drv or self._root) and
if (not (self.drive or self.root) and
self._parts and self._parts[0][:1] == '~'):
homedir = self._flavour.expanduser(self._parts[0])
if homedir[:1] == "~":
raise RuntimeError("Could not determine home directory.")
return self._from_parts([homedir] + self._parts[1:])
return type(self)(homedir, *self._parts[1:])

return self

Expand Down
4 changes: 3 additions & 1 deletion Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
class _BaseFlavourTest(object):

def _check_parse_parts(self, arg, expected):
f = self.cls._parse_parts
def f(parts):
path = self.flavour.join(*parts) if parts else ''
return self.cls._parse_path(path)
sep = self.flavour.sep
altsep = self.flavour.altsep
actual = f([x.replace('/', sep) for x in arg])
Expand Down