Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8a64cea

Browse files
bpo-38144: Add the root_dir and dir_fd parameters in glob.glob(). (GH-16075)
1 parent 8f192d1 commit 8a64cea

5 files changed

Lines changed: 175 additions & 47 deletions

File tree

Doc/library/glob.rst

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ For example, ``'[?]'`` matches the character ``'?'``.
3636
The :mod:`pathlib` module offers high-level path objects.
3737

3838

39-
.. function:: glob(pathname, *, recursive=False)
39+
.. function:: glob(pathname, *, root_dir=None, dir_fd=None, recursive=False)
4040

4141
Return a possibly-empty list of path names that match *pathname*, which must be
4242
a string containing a path specification. *pathname* can be either absolute
@@ -45,6 +45,15 @@ For example, ``'[?]'`` matches the character ``'?'``.
4545
symlinks are included in the results (as in the shell). Whether or not the
4646
results are sorted depends on the file system.
4747

48+
If *root_dir* is not ``None``, it should be a :term:`path-like object`
49+
specifying the root directory for searching. It has the same effect on
50+
:func:`glob` as changing the current directory before calling it. If
51+
*pathname* is relative, the result will contain paths relative to
52+
*root_dir*.
53+
54+
This function can support :ref:`paths relative to directory descriptors
55+
<dir_fd>` with the *dir_fd* parameter.
56+
4857
.. index::
4958
single: **; in glob-style wildcards
5059
@@ -62,8 +71,11 @@ For example, ``'[?]'`` matches the character ``'?'``.
6271
.. versionchanged:: 3.5
6372
Support for recursive globs using "``**``".
6473

74+
.. versionchanged:: 3.10
75+
Added the *root_dir* and *dir_fd* parameters.
76+
6577

66-
.. function:: iglob(pathname, *, recursive=False)
78+
.. function:: iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False)
6779

6880
Return an :term:`iterator` which yields the same values as :func:`glob`
6981
without actually storing them all simultaneously.

Doc/whatsnew/3.10.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,14 @@ New Modules
100100
Improved Modules
101101
================
102102

103+
glob
104+
----
105+
106+
Added the *root_dir* and *dir_fd* parameters in :func:`~glob.glob` and
107+
:func:`~glob.iglob` which allow to specify the root directory for searching.
108+
(Contributed by Serhiy Storchaka in :issue:`38144`.)
109+
110+
103111
Optimizations
104112
=============
105113

Lib/glob.py

Lines changed: 102 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
import os
44
import re
55
import fnmatch
6+
import itertools
7+
import stat
68
import sys
79

810
__all__ = ["glob", "iglob", "escape"]
911

10-
def glob(pathname, *, recursive=False):
12+
def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False):
1113
"""Return a list of paths matching a pathname pattern.
1214
1315
The pattern may contain simple shell-style wildcards a la
@@ -18,9 +20,9 @@ def glob(pathname, *, recursive=False):
1820
If recursive is true, the pattern '**' will match any files and
1921
zero or more directories and subdirectories.
2022
"""
21-
return list(iglob(pathname, recursive=recursive))
23+
return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive))
2224

23-
def iglob(pathname, *, recursive=False):
25+
def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False):
2426
"""Return an iterator which yields the paths matching a pathname pattern.
2527
2628
The pattern may contain simple shell-style wildcards a la
@@ -31,36 +33,43 @@ def iglob(pathname, *, recursive=False):
3133
If recursive is true, the pattern '**' will match any files and
3234
zero or more directories and subdirectories.
3335
"""
34-
sys.audit("glob.glob", pathname, recursive)
35-
it = _iglob(pathname, recursive, False)
36-
if recursive and _isrecursive(pathname):
37-
s = next(it) # skip empty string
38-
assert not s
36+
if root_dir is not None:
37+
root_dir = os.fspath(root_dir)
38+
else:
39+
root_dir = pathname[:0]
40+
it = _iglob(pathname, root_dir, dir_fd, recursive, False)
41+
if not pathname or recursive and _isrecursive(pathname[:2]):
42+
try:
43+
s = next(it) # skip empty string
44+
if s:
45+
it = itertools.chain((s,), it)
46+
except StopIteration:
47+
pass
3948
return it
4049

41-
def _iglob(pathname, recursive, dironly):
50+
def _iglob(pathname, root_dir, dir_fd, recursive, dironly):
4251
dirname, basename = os.path.split(pathname)
4352
if not has_magic(pathname):
4453
assert not dironly
4554
if basename:
46-
if os.path.lexists(pathname):
55+
if _lexists(_join(root_dir, pathname), dir_fd):
4756
yield pathname
4857
else:
4958
# Patterns ending with a slash should match only directories
50-
if os.path.isdir(dirname):
59+
if _isdir(_join(root_dir, dirname), dir_fd):
5160
yield pathname
5261
return
5362
if not dirname:
5463
if recursive and _isrecursive(basename):
55-
yield from _glob2(dirname, basename, dironly)
64+
yield from _glob2(root_dir, basename, dir_fd, dironly)
5665
else:
57-
yield from _glob1(dirname, basename, dironly)
66+
yield from _glob1(root_dir, basename, dir_fd, dironly)
5867
return
5968
# `os.path.split()` returns the argument itself as a dirname if it is a
6069
# drive or UNC path. Prevent an infinite recursion if a drive or UNC path
6170
# contains magic characters (i.e. r'\\?\C:').
6271
if dirname != pathname and has_magic(dirname):
63-
dirs = _iglob(dirname, recursive, True)
72+
dirs = _iglob(dirname, root_dir, dir_fd, recursive, True)
6473
else:
6574
dirs = [dirname]
6675
if has_magic(basename):
@@ -71,76 +80,121 @@ def _iglob(pathname, recursive, dironly):
7180
else:
7281
glob_in_dir = _glob0
7382
for dirname in dirs:
74-
for name in glob_in_dir(dirname, basename, dironly):
83+
for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly):
7584
yield os.path.join(dirname, name)
7685

7786
# These 2 helper functions non-recursively glob inside a literal directory.
7887
# They return a list of basenames. _glob1 accepts a pattern while _glob0
7988
# takes a literal basename (so it only has to check for its existence).
8089

81-
def _glob1(dirname, pattern, dironly):
82-
names = list(_iterdir(dirname, dironly))
90+
def _glob1(dirname, pattern, dir_fd, dironly):
91+
names = list(_iterdir(dirname, dir_fd, dironly))
8392
if not _ishidden(pattern):
8493
names = (x for x in names if not _ishidden(x))
8594
return fnmatch.filter(names, pattern)
8695

87-
def _glob0(dirname, basename, dironly):
88-
if not basename:
89-
# `os.path.split()` returns an empty basename for paths ending with a
90-
# directory separator. 'q*x/' should match only directories.
91-
if os.path.isdir(dirname):
96+
def _glob0(dirname, basename, dir_fd, dironly):
97+
if basename:
98+
if _lexists(_join(dirname, basename), dir_fd):
9299
return [basename]
93100
else:
94-
if os.path.lexists(os.path.join(dirname, basename)):
101+
# `os.path.split()` returns an empty basename for paths ending with a
102+
# directory separator. 'q*x/' should match only directories.
103+
if _isdir(dirname, dir_fd):
95104
return [basename]
96105
return []
97106

98107
# Following functions are not public but can be used by third-party code.
99108

100109
def glob0(dirname, pattern):
101-
return _glob0(dirname, pattern, False)
110+
return _glob0(dirname, pattern, None, False)
102111

103112
def glob1(dirname, pattern):
104-
return _glob1(dirname, pattern, False)
113+
return _glob1(dirname, pattern, None, False)
105114

106115
# This helper function recursively yields relative pathnames inside a literal
107116
# directory.
108117

109-
def _glob2(dirname, pattern, dironly):
118+
def _glob2(dirname, pattern, dir_fd, dironly):
110119
assert _isrecursive(pattern)
111120
yield pattern[:0]
112-
yield from _rlistdir(dirname, dironly)
121+
yield from _rlistdir(dirname, dir_fd, dironly)
113122

114123
# If dironly is false, yields all file names inside a directory.
115124
# If dironly is true, yields only directory names.
116-
def _iterdir(dirname, dironly):
117-
if not dirname:
118-
if isinstance(dirname, bytes):
119-
dirname = bytes(os.curdir, 'ASCII')
120-
else:
121-
dirname = os.curdir
125+
def _iterdir(dirname, dir_fd, dironly):
122126
try:
123-
with os.scandir(dirname) as it:
124-
for entry in it:
125-
try:
126-
if not dironly or entry.is_dir():
127-
yield entry.name
128-
except OSError:
129-
pass
127+
fd = None
128+
fsencode = None
129+
if dir_fd is not None:
130+
if dirname:
131+
fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd)
132+
else:
133+
arg = dir_fd
134+
if isinstance(dirname, bytes):
135+
fsencode = os.fsencode
136+
elif dirname:
137+
arg = dirname
138+
elif isinstance(dirname, bytes):
139+
arg = bytes(os.curdir, 'ASCII')
140+
else:
141+
arg = os.curdir
142+
try:
143+
with os.scandir(arg) as it:
144+
for entry in it:
145+
try:
146+
if not dironly or entry.is_dir():
147+
if fsencode is not None:
148+
yield fsencode(entry.name)
149+
else:
150+
yield entry.name
151+
except OSError:
152+
pass
153+
finally:
154+
if fd is not None:
155+
os.close(fd)
130156
except OSError:
131157
return
132158

133159
# Recursively yields relative pathnames inside a literal directory.
134-
def _rlistdir(dirname, dironly):
135-
names = list(_iterdir(dirname, dironly))
160+
def _rlistdir(dirname, dir_fd, dironly):
161+
names = list(_iterdir(dirname, dir_fd, dironly))
136162
for x in names:
137163
if not _ishidden(x):
138164
yield x
139-
path = os.path.join(dirname, x) if dirname else x
140-
for y in _rlistdir(path, dironly):
141-
yield os.path.join(x, y)
165+
path = _join(dirname, x) if dirname else x
166+
for y in _rlistdir(path, dir_fd, dironly):
167+
yield _join(x, y)
142168

143169

170+
def _lexists(pathname, dir_fd):
171+
# Same as os.path.lexists(), but with dir_fd
172+
if dir_fd is None:
173+
return os.path.lexists(pathname)
174+
try:
175+
os.lstat(pathname, dir_fd=dir_fd)
176+
except (OSError, ValueError):
177+
return False
178+
else:
179+
return True
180+
181+
def _isdir(pathname, dir_fd):
182+
# Same as os.path.isdir(), but with dir_fd
183+
if dir_fd is None:
184+
return os.path.isdir(pathname)
185+
try:
186+
st = os.stat(pathname, dir_fd=dir_fd)
187+
except (OSError, ValueError):
188+
return False
189+
else:
190+
return stat.S_ISDIR(st.st_mode)
191+
192+
def _join(dirname, basename):
193+
# It is common if dirname or basename is empty
194+
if not dirname or not basename:
195+
return dirname or basename
196+
return os.path.join(dirname, basename)
197+
144198
magic_check = re.compile('([*?[])')
145199
magic_check_bytes = re.compile(b'([*?[])')
146200

@@ -171,3 +225,6 @@ def escape(pathname):
171225
else:
172226
pathname = magic_check.sub(r'[\1]', pathname)
173227
return drive + pathname
228+
229+
230+
_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)

Lib/test/test_glob.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010

1111
class GlobTests(unittest.TestCase):
12+
dir_fd = None
1213

1314
def norm(self, *parts):
1415
return os.path.normpath(os.path.join(self.tempdir, *parts))
@@ -38,8 +39,14 @@ def setUp(self):
3839
os.symlink(self.norm('broken'), self.norm('sym1'))
3940
os.symlink('broken', self.norm('sym2'))
4041
os.symlink(os.path.join('a', 'bcd'), self.norm('sym3'))
42+
if {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd:
43+
self.dir_fd = os.open(self.tempdir, os.O_RDONLY | os.O_DIRECTORY)
44+
else:
45+
self.dir_fd = None
4146

4247
def tearDown(self):
48+
if self.dir_fd is not None:
49+
os.close(self.dir_fd)
4350
shutil.rmtree(self.tempdir)
4451

4552
def glob(self, *parts, **kwargs):
@@ -53,6 +60,41 @@ def glob(self, *parts, **kwargs):
5360
bres = [os.fsencode(x) for x in res]
5461
self.assertCountEqual(glob.glob(os.fsencode(p), **kwargs), bres)
5562
self.assertCountEqual(glob.iglob(os.fsencode(p), **kwargs), bres)
63+
64+
with change_cwd(self.tempdir):
65+
res2 = glob.glob(pattern, **kwargs)
66+
for x in res2:
67+
self.assertFalse(os.path.isabs(x), x)
68+
if pattern == '**' or pattern == '**' + os.sep:
69+
expected = res[1:]
70+
else:
71+
expected = res
72+
self.assertCountEqual([os.path.join(self.tempdir, x) for x in res2],
73+
expected)
74+
self.assertCountEqual(glob.iglob(pattern, **kwargs), res2)
75+
bpattern = os.fsencode(pattern)
76+
bres2 = [os.fsencode(x) for x in res2]
77+
self.assertCountEqual(glob.glob(bpattern, **kwargs), bres2)
78+
self.assertCountEqual(glob.iglob(bpattern, **kwargs), bres2)
79+
80+
self.assertCountEqual(glob.glob(pattern, root_dir=self.tempdir, **kwargs), res2)
81+
self.assertCountEqual(glob.iglob(pattern, root_dir=self.tempdir, **kwargs), res2)
82+
btempdir = os.fsencode(self.tempdir)
83+
self.assertCountEqual(
84+
glob.glob(bpattern, root_dir=btempdir, **kwargs), bres2)
85+
self.assertCountEqual(
86+
glob.iglob(bpattern, root_dir=btempdir, **kwargs), bres2)
87+
88+
if self.dir_fd is not None:
89+
self.assertCountEqual(
90+
glob.glob(pattern, dir_fd=self.dir_fd, **kwargs), res2)
91+
self.assertCountEqual(
92+
glob.iglob(pattern, dir_fd=self.dir_fd, **kwargs), res2)
93+
self.assertCountEqual(
94+
glob.glob(bpattern, dir_fd=self.dir_fd, **kwargs), bres2)
95+
self.assertCountEqual(
96+
glob.iglob(bpattern, dir_fd=self.dir_fd, **kwargs), bres2)
97+
5698
return res
5799

58100
def assertSequencesEqual_noorder(self, l1, l2):
@@ -78,6 +120,14 @@ def test_glob_literal(self):
78120
res = glob.glob(os.path.join(os.fsencode(os.curdir), b'*'))
79121
self.assertEqual({type(r) for r in res}, {bytes})
80122

123+
def test_glob_empty_pattern(self):
124+
self.assertEqual(glob.glob(''), [])
125+
self.assertEqual(glob.glob(b''), [])
126+
self.assertEqual(glob.glob('', root_dir=self.tempdir), [])
127+
self.assertEqual(glob.glob(b'', root_dir=os.fsencode(self.tempdir)), [])
128+
self.assertEqual(glob.glob('', dir_fd=self.dir_fd), [])
129+
self.assertEqual(glob.glob(b'', dir_fd=self.dir_fd), [])
130+
81131
def test_glob_one_directory(self):
82132
eq = self.assertSequencesEqual_noorder
83133
eq(self.glob('a*'), map(self.norm, ['a', 'aab', 'aaa']))
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added the *root_dir* and *dir_fd* parameters in :func:`glob.glob`.

0 commit comments

Comments
 (0)