Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 3d4fe93

Browse files
CPython Developersyouknowone
authored andcommitted
Update fnmatch from CPython 3.10
1 parent 2154d12 commit 3d4fe93

File tree

2 files changed

+236
-19
lines changed

2 files changed

+236
-19
lines changed

Lib/fnmatch.py

Lines changed: 86 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,19 @@
99
The function translate(PATTERN) returns a regular expression
1010
corresponding to PATTERN. (It does not compile it.)
1111
"""
12-
try:
13-
import os
14-
except ImportError:
15-
import _dummy_os as os
12+
import os
1613
import posixpath
1714
import re
1815
import functools
1916

2017
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
2118

19+
# Build a thread-safe incrementing counter to help create unique regexp group
20+
# names across calls.
21+
from itertools import count
22+
_nextgroupnum = count().__next__
23+
del count
24+
2225
def fnmatch(name, pat):
2326
"""Test whether FILENAME matches PATTERN.
2427
@@ -49,7 +52,7 @@ def _compile_pattern(pat):
4952
return re.compile(res).match
5053

5154
def filter(names, pat):
52-
"""Return the subset of the list NAMES that match PAT."""
55+
"""Construct a list from those elements of the iterable NAMES that match PAT."""
5356
result = []
5457
pat = os.path.normcase(pat)
5558
match = _compile_pattern(pat)
@@ -80,15 +83,19 @@ def translate(pat):
8083
There is no way to quote meta-characters.
8184
"""
8285

86+
STAR = object()
87+
res = []
88+
add = res.append
8389
i, n = 0, len(pat)
84-
res = ''
8590
while i < n:
8691
c = pat[i]
8792
i = i+1
8893
if c == '*':
89-
res = res + '.*'
94+
# compress consecutive `*` into one
95+
if (not res) or res[-1] is not STAR:
96+
add(STAR)
9097
elif c == '?':
91-
res = res + '.'
98+
add('.')
9299
elif c == '[':
93100
j = i
94101
if j < n and pat[j] == '!':
@@ -98,10 +105,10 @@ def translate(pat):
98105
while j < n and pat[j] != ']':
99106
j = j+1
100107
if j >= n:
101-
res = res + '\\['
108+
add('\\[')
102109
else:
103110
stuff = pat[i:j]
104-
if '--' not in stuff:
111+
if '-' not in stuff:
105112
stuff = stuff.replace('\\', r'\\')
106113
else:
107114
chunks = []
@@ -113,19 +120,80 @@ def translate(pat):
113120
chunks.append(pat[i:k])
114121
i = k+1
115122
k = k+3
116-
chunks.append(pat[i:j])
123+
chunk = pat[i:j]
124+
if chunk:
125+
chunks.append(chunk)
126+
else:
127+
chunks[-1] += '-'
128+
# Remove empty ranges -- invalid in RE.
129+
for k in range(len(chunks)-1, 0, -1):
130+
if chunks[k-1][-1] > chunks[k][0]:
131+
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
132+
del chunks[k]
117133
# Escape backslashes and hyphens for set difference (--).
118134
# Hyphens that create ranges shouldn't be escaped.
119135
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
120136
for s in chunks)
121137
# Escape set operations (&&, ~~ and ||).
122138
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
123139
i = j+1
124-
if stuff[0] == '!':
125-
stuff = '^' + stuff[1:]
126-
elif stuff[0] in ('^', '['):
127-
stuff = '\\' + stuff
128-
res = '%s[%s]' % (res, stuff)
140+
if not stuff:
141+
# Empty range: never match.
142+
add('(?!)')
143+
elif stuff == '!':
144+
# Negated empty range: match any character.
145+
add('.')
146+
else:
147+
if stuff[0] == '!':
148+
stuff = '^' + stuff[1:]
149+
elif stuff[0] in ('^', '['):
150+
stuff = '\\' + stuff
151+
add(f'[{stuff}]')
152+
else:
153+
add(re.escape(c))
154+
assert i == n
155+
156+
# Deal with STARs.
157+
inp = res
158+
res = []
159+
add = res.append
160+
i, n = 0, len(inp)
161+
# Fixed pieces at the start?
162+
while i < n and inp[i] is not STAR:
163+
add(inp[i])
164+
i += 1
165+
# Now deal with STAR fixed STAR fixed ...
166+
# For an interior `STAR fixed` pairing, we want to do a minimal
167+
# .*? match followed by `fixed`, with no possibility of backtracking.
168+
# We can't spell that directly, but can trick it into working by matching
169+
# .*?fixed
170+
# in a lookahead assertion, save the matched part in a group, then
171+
# consume that group via a backreference. If the overall match fails,
172+
# the lookahead assertion won't try alternatives. So the translation is:
173+
# (?=(?P<name>.*?fixed))(?P=name)
174+
# Group names are created as needed: g0, g1, g2, ...
175+
# The numbers are obtained from _nextgroupnum() to ensure they're unique
176+
# across calls and across threads. This is because people rely on the
177+
# undocumented ability to join multiple translate() results together via
178+
# "|" to build large regexps matching "one of many" shell patterns.
179+
while i < n:
180+
assert inp[i] is STAR
181+
i += 1
182+
if i == n:
183+
add(".*")
184+
break
185+
assert inp[i] is not STAR
186+
fixed = []
187+
while i < n and inp[i] is not STAR:
188+
fixed.append(inp[i])
189+
i += 1
190+
fixed = "".join(fixed)
191+
if i == n:
192+
add(".*")
193+
add(fixed)
129194
else:
130-
res = res + re.escape(c)
131-
return r'(?s:%s)\Z' % res
195+
groupnum = _nextgroupnum()
196+
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
197+
assert i == n
198+
res = "".join(res)
199+
return fr'(?s:{res})\Z'

Lib/test/test_fnmatch.py

Lines changed: 150 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import unittest
44
import os
5+
import string
56
import warnings
67

78
from fnmatch import fnmatch, fnmatchcase, translate, filter
@@ -45,6 +46,13 @@ def test_fnmatch(self):
4546
check('\nfoo', 'foo*', False)
4647
check('\n', '*')
4748

49+
def test_slow_fnmatch(self):
50+
check = self.check_match
51+
check('a' * 50, '*a*a*a*a*a*a*a*a*a*a')
52+
# The next "takes forever" if the regexp translation is
53+
# straightforward. See bpo-40480.
54+
check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False)
55+
4856
def test_mix_bytes_str(self):
4957
self.assertRaises(TypeError, fnmatch, 'test', b'*')
5058
self.assertRaises(TypeError, fnmatch, b'test', '*')
@@ -89,6 +97,119 @@ def test_sep(self):
8997
check('usr/bin', 'usr\\bin', normsep)
9098
check('usr\\bin', 'usr\\bin')
9199

100+
def test_char_set(self):
101+
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
102+
check = self.check_match
103+
tescases = string.ascii_lowercase + string.digits + string.punctuation
104+
for c in tescases:
105+
check(c, '[az]', c in 'az')
106+
check(c, '[!az]', c not in 'az')
107+
# Case insensitive.
108+
for c in tescases:
109+
check(c, '[AZ]', (c in 'az') and ignorecase)
110+
check(c, '[!AZ]', (c not in 'az') or not ignorecase)
111+
for c in string.ascii_uppercase:
112+
check(c, '[az]', (c in 'AZ') and ignorecase)
113+
check(c, '[!az]', (c not in 'AZ') or not ignorecase)
114+
# Repeated same character.
115+
for c in tescases:
116+
check(c, '[aa]', c == 'a')
117+
# Special cases.
118+
for c in tescases:
119+
check(c, '[^az]', c in '^az')
120+
check(c, '[[az]', c in '[az')
121+
check(c, r'[!]]', c != ']')
122+
check('[', '[')
123+
check('[]', '[]')
124+
check('[!', '[!')
125+
check('[!]', '[!]')
126+
127+
def test_range(self):
128+
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
129+
normsep = os.path.normcase('\\') == os.path.normcase('/')
130+
check = self.check_match
131+
tescases = string.ascii_lowercase + string.digits + string.punctuation
132+
for c in tescases:
133+
check(c, '[b-d]', c in 'bcd')
134+
check(c, '[!b-d]', c not in 'bcd')
135+
check(c, '[b-dx-z]', c in 'bcdxyz')
136+
check(c, '[!b-dx-z]', c not in 'bcdxyz')
137+
# Case insensitive.
138+
for c in tescases:
139+
check(c, '[B-D]', (c in 'bcd') and ignorecase)
140+
check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
141+
for c in string.ascii_uppercase:
142+
check(c, '[b-d]', (c in 'BCD') and ignorecase)
143+
check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
144+
# Upper bound == lower bound.
145+
for c in tescases:
146+
check(c, '[b-b]', c == 'b')
147+
# Special cases.
148+
for c in tescases:
149+
check(c, '[!-#]', c not in '-#')
150+
check(c, '[!--.]', c not in '-.')
151+
check(c, '[^-`]', c in '^_`')
152+
if not (normsep and c == '/'):
153+
check(c, '[[-^]', c in r'[\]^')
154+
check(c, r'[\-^]', c in r'\]^')
155+
check(c, '[b-]', c in '-b')
156+
check(c, '[!b-]', c not in '-b')
157+
check(c, '[-b]', c in '-b')
158+
check(c, '[!-b]', c not in '-b')
159+
check(c, '[-]', c in '-')
160+
check(c, '[!-]', c not in '-')
161+
# Upper bound is less that lower bound: error in RE.
162+
for c in tescases:
163+
check(c, '[d-b]', False)
164+
check(c, '[!d-b]', True)
165+
check(c, '[d-bx-z]', c in 'xyz')
166+
check(c, '[!d-bx-z]', c not in 'xyz')
167+
check(c, '[d-b^-`]', c in '^_`')
168+
if not (normsep and c == '/'):
169+
check(c, '[d-b[-^]', c in r'[\]^')
170+
171+
def test_sep_in_char_set(self):
172+
normsep = os.path.normcase('\\') == os.path.normcase('/')
173+
check = self.check_match
174+
check('/', r'[/]')
175+
check('\\', r'[\]')
176+
check('/', r'[\]', normsep)
177+
check('\\', r'[/]', normsep)
178+
check('[/]', r'[/]', False)
179+
check(r'[\\]', r'[/]', False)
180+
check('\\', r'[\t]')
181+
check('/', r'[\t]', normsep)
182+
check('t', r'[\t]')
183+
check('\t', r'[\t]', False)
184+
185+
def test_sep_in_range(self):
186+
normsep = os.path.normcase('\\') == os.path.normcase('/')
187+
check = self.check_match
188+
check('a/b', 'a[.-0]b', not normsep)
189+
check('a\\b', 'a[.-0]b', False)
190+
check('a\\b', 'a[Z-^]b', not normsep)
191+
check('a/b', 'a[Z-^]b', False)
192+
193+
check('a/b', 'a[/-0]b', not normsep)
194+
check(r'a\b', 'a[/-0]b', False)
195+
check('a[/-0]b', 'a[/-0]b', False)
196+
check(r'a[\-0]b', 'a[/-0]b', False)
197+
198+
check('a/b', 'a[.-/]b')
199+
check(r'a\b', 'a[.-/]b', normsep)
200+
check('a[.-/]b', 'a[.-/]b', False)
201+
check(r'a[.-\]b', 'a[.-/]b', False)
202+
203+
check(r'a\b', r'a[\-^]b')
204+
check('a/b', r'a[\-^]b', normsep)
205+
check(r'a[\-^]b', r'a[\-^]b', False)
206+
check('a[/-^]b', r'a[\-^]b', False)
207+
208+
check(r'a\b', r'a[Z-\]b', not normsep)
209+
check('a/b', r'a[Z-\]b', False)
210+
check(r'a[Z-\]b', r'a[Z-\]b', False)
211+
check('a[Z-/]b', r'a[Z-\]b', False)
212+
92213
def test_warnings(self):
93214
with warnings.catch_warnings():
94215
warnings.simplefilter('error', Warning)
@@ -104,6 +225,7 @@ def test_warnings(self):
104225
class TranslateTestCase(unittest.TestCase):
105226

106227
def test_translate(self):
228+
import re
107229
self.assertEqual(translate('*'), r'(?s:.*)\Z')
108230
self.assertEqual(translate('?'), r'(?s:.)\Z')
109231
self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
@@ -112,7 +234,34 @@ def test_translate(self):
112234
self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
113235
self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
114236
self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
115-
237+
# from the docs
238+
self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z')
239+
# squash consecutive stars
240+
self.assertEqual(translate('*********'), r'(?s:.*)\Z')
241+
self.assertEqual(translate('A*********'), r'(?s:A.*)\Z')
242+
self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
243+
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
244+
# fancy translation to prevent exponential-time match failure
245+
t = translate('**a*a****a')
246+
digits = re.findall(r'\d+', t)
247+
self.assertEqual(len(digits), 4)
248+
self.assertEqual(digits[0], digits[1])
249+
self.assertEqual(digits[2], digits[3])
250+
g1 = f"g{digits[0]}" # e.g., group name "g4"
251+
g2 = f"g{digits[2]}" # e.g., group name "g5"
252+
self.assertEqual(t,
253+
fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z')
254+
# and try pasting multiple translate results - it's an undocumented
255+
# feature that this works; all the pain of generating unique group
256+
# names across calls exists to support this
257+
r1 = translate('**a**a**a*')
258+
r2 = translate('**b**b**b*')
259+
r3 = translate('*c*c*c*')
260+
fatre = "|".join([r1, r2, r3])
261+
self.assertTrue(re.match(fatre, 'abaccad'))
262+
self.assertTrue(re.match(fatre, 'abxbcab'))
263+
self.assertTrue(re.match(fatre, 'cbabcaxc'))
264+
self.assertFalse(re.match(fatre, 'dabccbad'))
116265

117266
class FilterTestCase(unittest.TestCase):
118267

0 commit comments

Comments
 (0)