From 871dbdb455ee4b3aa6f77a22613b3719b1ccf8c9 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 14 Apr 2023 21:16:56 +0100 Subject: [PATCH 1/7] GH-103548: Improve performance of `pathlib.Path.[is_]absolute()` Pass the *unnormalized* path to `os.path.isabs()`. When making an empty path absolute, avoid calling `os.path.join()`, and prevent pathlib from later re-normalizing the result of `os.getcwd()` by storing it as `path._str`. --- Lib/pathlib.py | 9 +++++++-- .../2023-04-14-21-16-05.gh-issue-103548.lagdpp.rst | 3 +++ 2 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-04-14-21-16-05.gh-issue-103548.lagdpp.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 4ae1fae6f4b358..ff72ce24a4259f 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -650,7 +650,7 @@ def is_absolute(self): # ntpath.isabs() is defective - see GH-44626 . if self._flavour is ntpath: return bool(self.drive and self.root) - return self._flavour.isabs(self) + return self._flavour.isabs(self._raw_path) def is_reserved(self): """Return True if the path contains one of the special names reserved @@ -857,8 +857,13 @@ def absolute(self): elif self.drive: # There is a CWD on each drive-letter drive. cwd = self._flavour.abspath(self.drive) - else: + elif self._tail: cwd = os.getcwd() + else: + path_str = os.getcwd() + path = type(self)(path_str) + path._str = path_str # Fully normalized string from getcwd(). + return path return type(self)(cwd, self._raw_path) def resolve(self, strict=False): diff --git a/Misc/NEWS.d/next/Library/2023-04-14-21-16-05.gh-issue-103548.lagdpp.rst b/Misc/NEWS.d/next/Library/2023-04-14-21-16-05.gh-issue-103548.lagdpp.rst new file mode 100644 index 00000000000000..3cb08ac3a8f0ac --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-04-14-21-16-05.gh-issue-103548.lagdpp.rst @@ -0,0 +1,3 @@ +Improve performance of :meth:`pathlib.PurePath.is_absolute` and +:meth:`pathlib.Path.absolute` by working with unnormalized paths and +avoiding unnecessary joining. From 72c6652aac6c726c8920dfeedb8c036e4a487934 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 14 Apr 2023 22:09:05 +0100 Subject: [PATCH 2/7] Fix `absolute()` on Windows. --- Lib/pathlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index ff72ce24a4259f..d71a45fc83752f 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -857,7 +857,7 @@ def absolute(self): elif self.drive: # There is a CWD on each drive-letter drive. cwd = self._flavour.abspath(self.drive) - elif self._tail: + elif self.root or self._tail: cwd = os.getcwd() else: path_str = os.getcwd() From 37153c5977a4a67389369aa7ad5a1ba2d6b26b94 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 15 Apr 2023 15:32:48 +0100 Subject: [PATCH 3/7] Simplify code --- Lib/pathlib.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index d71a45fc83752f..659e13db6a959b 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -857,13 +857,12 @@ def absolute(self): elif self.drive: # There is a CWD on each drive-letter drive. cwd = self._flavour.abspath(self.drive) - elif self.root or self._tail: - cwd = os.getcwd() else: - path_str = os.getcwd() - path = type(self)(path_str) - path._str = path_str # Fully normalized string from getcwd(). - return path + cwd = os.getcwd() + if not self.root and not self._tail: + path = type(self)(cwd) + path._str = cwd # Fully normalized string from getcwd(). + return path return type(self)(cwd, self._raw_path) def resolve(self, strict=False): From cfe99fd1f2fb39f47200730abfa17a4475196550 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 6 May 2023 16:48:34 +0100 Subject: [PATCH 4/7] Improve news blurb --- .../Library/2023-04-14-21-16-05.gh-issue-103548.lagdpp.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2023-04-14-21-16-05.gh-issue-103548.lagdpp.rst b/Misc/NEWS.d/next/Library/2023-04-14-21-16-05.gh-issue-103548.lagdpp.rst index 3cb08ac3a8f0ac..238f2868867472 100644 --- a/Misc/NEWS.d/next/Library/2023-04-14-21-16-05.gh-issue-103548.lagdpp.rst +++ b/Misc/NEWS.d/next/Library/2023-04-14-21-16-05.gh-issue-103548.lagdpp.rst @@ -1,3 +1,4 @@ -Improve performance of :meth:`pathlib.PurePath.is_absolute` and -:meth:`pathlib.Path.absolute` by working with unnormalized paths and -avoiding unnecessary joining. +Improve performance of :meth:`pathlib.Path.absolute` and +:meth:`~pathlib.Path.cwd` by joining paths only when necessary. Also improve +performance of :meth:`pathlib.PurePath.is_absolute` on Posix by skipping path +parsing and normalization. From 12957bf62954b25c5ebc7dad729192a5ca36f1f8 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 6 May 2023 17:49:18 +0100 Subject: [PATCH 5/7] Suggestions from code review. --- Lib/pathlib.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index e9ab2b44f89ceb..80ccef7b52e501 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -873,10 +873,14 @@ def absolute(self): cwd = self._flavour.abspath(self.drive) else: cwd = os.getcwd() + # Fast path for "empty" paths, e.g. Path("."), Path("") or Path(). + # We pass only one argument to with_segments() to avoid the cost + # of joining, and we exploit the fact that getcwd() returns a + # fully-normalized string by storing it in _str. if not self.root and not self._tail: - path = self.with_segments(cwd) - path._str = cwd # Fully normalized string from getcwd(). - return path + result = self.with_segments(cwd) + result._str = cwd + return result return self.with_segments(cwd, self) def resolve(self, strict=False): From b476ce60caea53ea5ef60cdc9dff88cd7a2763f1 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 6 May 2023 18:31:02 +0100 Subject: [PATCH 6/7] Mention Path.cwd() in comment. --- Lib/pathlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 80ccef7b52e501..bac20000155066 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -873,7 +873,7 @@ def absolute(self): cwd = self._flavour.abspath(self.drive) else: cwd = os.getcwd() - # Fast path for "empty" paths, e.g. Path("."), Path("") or Path(). + # Fast path for "empty" paths, e.g. Path.cwd(), Path("."), Path(). # We pass only one argument to with_segments() to avoid the cost # of joining, and we exploit the fact that getcwd() returns a # fully-normalized string by storing it in _str. From b32b86c14635bab739d33e230cbe3737b8e2bbe8 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 6 May 2023 18:34:02 +0100 Subject: [PATCH 7/7] Clarify comment --- Lib/pathlib.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index bac20000155066..480c354ce8b656 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -873,10 +873,11 @@ def absolute(self): cwd = self._flavour.abspath(self.drive) else: cwd = os.getcwd() - # Fast path for "empty" paths, e.g. Path.cwd(), Path("."), Path(). + # Fast path for "empty" paths, e.g. Path("."), Path("") or Path(). # We pass only one argument to with_segments() to avoid the cost # of joining, and we exploit the fact that getcwd() returns a - # fully-normalized string by storing it in _str. + # fully-normalized string by storing it in _str. This is used to + # implement Path.cwd(). if not self.root and not self._tail: result = self.with_segments(cwd) result._str = cwd