From f884b3f9757bb0386429639ebf34a21b0638eb51 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 3 May 2023 00:17:15 +0100 Subject: [PATCH 1/2] GH-90208 - Suppress OSError exceptions from `pathlib.Path.glob()` `pathlib.Path.glob()` now suppresses all OSError exceptions, except those raised from calling `is_dir()` on the top-level path. Previously, `glob()` suppressed ENOENT, ENOTDIR, EBADF and ELOOP errors and their Windows equivalents. PermissionError was also suppressed unless it occurred when calling `is_dir()` on the top-level path. However, the selector would abort prematurely if a PermissionError was raised, and so `glob()` could return incomplete results. --- Lib/pathlib.py | 54 ++++++++----------- Lib/test/test_pathlib.py | 38 +++++-------- ...3-05-03-19-22-24.gh-issue-90208.tI00da.rst | 4 ++ 3 files changed, 39 insertions(+), 57 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-05-03-19-22-24.gh-issue-90208.tI00da.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8a1651c23d7f4d..c1bacd7adde85a 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -130,25 +130,21 @@ def _select_from(self, parent_path, scandir): # avoid exhausting file descriptors when globbing deep trees. with scandir(parent_path) as scandir_it: entries = list(scandir_it) + except OSError: + pass + else: for entry in entries: if self.dironly: try: - # "entry.is_dir()" can raise PermissionError - # in some cases (see bpo-38894), which is not - # among the errors ignored by _ignore_error() if not entry.is_dir(): continue - except OSError as e: - if not _ignore_error(e): - raise + except OSError: continue name = entry.name if self.match(name): path = parent_path._make_child_relpath(name) for p in self.successor._select_from(path, scandir): yield p - except PermissionError: - return class _RecursiveWildcardSelector(_Selector): @@ -163,34 +159,30 @@ def _iterate_directories(self, parent_path, scandir): # avoid exhausting file descriptors when globbing deep trees. with scandir(parent_path) as scandir_it: entries = list(scandir_it) + except OSError: + pass + else: for entry in entries: - entry_is_dir = False try: - entry_is_dir = entry.is_dir() - except OSError as e: - if not _ignore_error(e): - raise - if entry_is_dir and not entry.is_symlink(): - path = parent_path._make_child_relpath(entry.name) - for p in self._iterate_directories(path, scandir): - yield p - except PermissionError: - return + if not entry.is_dir(follow_symlinks=False): + continue + except OSError: + continue + path = parent_path._make_child_relpath(entry.name) + for p in self._iterate_directories(path, scandir): + yield p def _select_from(self, parent_path, scandir): + yielded = set() try: - yielded = set() - try: - successor_select = self.successor._select_from - for starting_point in self._iterate_directories(parent_path, scandir): - for p in successor_select(starting_point, scandir): - if p not in yielded: - yield p - yielded.add(p) - finally: - yielded.clear() - except PermissionError: - return + successor_select = self.successor._select_from + for starting_point in self._iterate_directories(parent_path, scandir): + for p in successor_select(starting_point, scandir): + if p not in yielded: + yield p + yielded.add(p) + finally: + yielded.clear() # diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 424bb92a87d112..cdf549cec4ecea 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1910,33 +1910,19 @@ def test_glob_permissions(self): P = self.cls base = P(BASE) / 'permissions' base.mkdir() + self.addCleanup(os_helper.rmtree, base) - file1 = base / "file1" - file1.touch() - file2 = base / "file2" - file2.touch() - - subdir = base / "subdir" - - file3 = base / "file3" - file3.symlink_to(subdir / "other") - - # Patching is needed to avoid relying on the filesystem - # to return the order of the files as the error will not - # happen if the symlink is the last item. - real_scandir = os.scandir - def my_scandir(path): - with real_scandir(path) as scandir_it: - entries = list(scandir_it) - entries.sort(key=lambda entry: entry.name) - return contextlib.nullcontext(entries) - - with mock.patch("os.scandir", my_scandir): - self.assertEqual(len(set(base.glob("*"))), 3) - subdir.mkdir() - self.assertEqual(len(set(base.glob("*"))), 4) - subdir.chmod(000) - self.assertEqual(len(set(base.glob("*"))), 4) + for i in range(100): + link = base / f"link{i}" + if i % 2: + link.symlink_to(P(BASE, "dirE", "nonexistent")) + else: + link.symlink_to(P(BASE, "dirC")) + + self.assertEqual(len(set(base.glob("*"))), 100) + self.assertEqual(len(set(base.glob("*/"))), 50) + self.assertEqual(len(set(base.glob("*/fileC"))), 50) + self.assertEqual(len(set(base.glob("*/file*"))), 50) def _check_resolve(self, p, expected, strict=True): q = p.resolve(strict) diff --git a/Misc/NEWS.d/next/Library/2023-05-03-19-22-24.gh-issue-90208.tI00da.rst b/Misc/NEWS.d/next/Library/2023-05-03-19-22-24.gh-issue-90208.tI00da.rst new file mode 100644 index 00000000000000..1fd9588bebd5d8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-05-03-19-22-24.gh-issue-90208.tI00da.rst @@ -0,0 +1,4 @@ +Fixed issue where :meth:`pathlib.Path.glob` returned incomplete results when +it encountered a :exc:`PermissionError`. This method now suppresses all +:exc:`OSError` exceptions, except those raised from calling +:meth:`~pathlib.Path.is_dir` on the top-level path. From 556c70275a3ced025140b6851d65a07f8c2b0bb9 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 10 May 2023 18:24:19 +0100 Subject: [PATCH 2/2] Reduce diff --- Lib/pathlib.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index ede3796b73bc48..40b72930e1f08a 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -175,14 +175,15 @@ def _iterate_directories(self, parent_path, scandir): pass else: for entry in entries: + entry_is_dir = False try: - if not entry.is_dir(follow_symlinks=False): - continue + entry_is_dir = entry.is_dir(follow_symlinks=False) except OSError: - continue - path = parent_path._make_child_relpath(entry.name) - for p in self._iterate_directories(path, scandir): - yield p + pass + if entry_is_dir: + path = parent_path._make_child_relpath(entry.name) + for p in self._iterate_directories(path, scandir): + yield p def _select_from(self, parent_path, scandir): successor_select = self.successor._select_from