From 84534f9df17f8134b0329e43a8cb5d16f9551835 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 10 Apr 2024 21:33:06 +0100 Subject: [PATCH 1/2] GH-117727: Speed up `pathlib.Path.iterdir()` by using `os.scandir()` Replace use of `os.listdir()` with `os.scandir()`. Forgo setting `_drv`, `_root` and `_tail_cached`, as these usually aren't needed. Use `os.DirEntry.path` to set `_str`. --- Lib/pathlib/__init__.py | 25 +++++-------------- ...-04-10-21-30-37.gh-issue-117727.uAYNVS.rst | 2 ++ 2 files changed, 8 insertions(+), 19 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-04-10-21-30-37.gh-issue-117727.uAYNVS.rst diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 88e3286d9b08dc..f64befd03360f2 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -584,7 +584,12 @@ def iterdir(self): The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - return (self._make_child_relpath(name) for name in os.listdir(self)) + root_dir = str(self) + with os.scandir(root_dir) as scandir_it: + paths = [entry.path for entry in scandir_it] + if root_dir == '.': + paths = map(self._remove_leading_dot, paths) + return map(self._from_parsed_string, paths) def _scandir(self): return os.scandir(self) @@ -599,24 +604,6 @@ def _make_child_direntry(self, entry): path._tail_cached = self._tail + [entry.name] return path - def _make_child_relpath(self, name): - if not name: - return self - path_str = str(self) - tail = self._tail - if tail: - path_str = f'{path_str}{self.parser.sep}{name}' - elif path_str != '.': - path_str = f'{path_str}{name}' - else: - path_str = name - path = self.with_segments(path_str) - path._str = path_str - path._drv = self.drive - path._root = self.root - path._tail_cached = tail + [name] - return path - def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): """Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. diff --git a/Misc/NEWS.d/next/Library/2024-04-10-21-30-37.gh-issue-117727.uAYNVS.rst b/Misc/NEWS.d/next/Library/2024-04-10-21-30-37.gh-issue-117727.uAYNVS.rst new file mode 100644 index 00000000000000..3a0b6834e91f18 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-04-10-21-30-37.gh-issue-117727.uAYNVS.rst @@ -0,0 +1,2 @@ +Speed up :meth:`pathlib.Path.iterdir` by using :func:`os.scandir` +internally. From 8bec7c88ccbc1cb9fb75fc14cec07150c8929d4e Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 12 Apr 2024 22:24:05 +0100 Subject: [PATCH 2/2] Remove errant newline --- Lib/pathlib/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 9a5bb2340f5de8..66eb08a45b1bb3 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -591,7 +591,6 @@ def iterdir(self): paths = map(self._remove_leading_dot, paths) return map(self._from_parsed_string, paths) - def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): """Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern.