Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 9bd85b8

Browse files
Issue #27030: Unknown escapes consisting of '\' and ASCII letter in
regular expressions now are errors.
1 parent d35bf03 commit 9bd85b8

4 files changed

Lines changed: 32 additions & 84 deletions

File tree

Doc/library/re.rst

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -317,8 +317,9 @@ The special characters are:
317317

318318

319319
The special sequences consist of ``'\'`` and a character from the list below.
320-
If the ordinary character is not on the list, then the resulting RE will match
321-
the second character. For example, ``\$`` matches the character ``'$'``.
320+
If the ordinary character is not ASCII digit or ASCII letter, then the
321+
resulting RE will match the second character. For example, ``\$`` matches the
322+
character ``'$'``.
322323

323324
``\number``
324325
Matches the contents of the group of the same number. Groups are numbered
@@ -438,9 +439,8 @@ three digits in length.
438439
.. versionchanged:: 3.3
439440
The ``'\u'`` and ``'\U'`` escape sequences have been added.
440441

441-
.. deprecated-removed:: 3.5 3.6
442-
Unknown escapes consist of ``'\'`` and ASCII letter now raise a
443-
deprecation warning and will be forbidden in Python 3.6.
442+
.. versionchanged:: 3.6
443+
Unknown escapes consisting of ``'\'`` and ASCII letter now are errors.
444444

445445

446446
.. seealso::
@@ -528,11 +528,11 @@ form.
528528
current locale. The use of this flag is discouraged as the locale mechanism
529529
is very unreliable, and it only handles one "culture" at a time anyway;
530530
you should use Unicode matching instead, which is the default in Python 3
531-
for Unicode (str) patterns. This flag makes sense only with bytes patterns.
531+
for Unicode (str) patterns. This flag can be used only with bytes patterns.
532532

533-
.. deprecated-removed:: 3.5 3.6
534-
Deprecated the use of :const:`re.LOCALE` with string patterns or
535-
:const:`re.ASCII`.
533+
.. versionchanged:: 3.6
534+
:const:`re.LOCALE` can be used only with bytes patterns and is
535+
not compatible with :const:`re.ASCII`.
536536

537537

538538
.. data:: M
@@ -738,9 +738,8 @@ form.
738738
.. versionchanged:: 3.5
739739
Unmatched groups are replaced with an empty string.
740740

741-
.. deprecated-removed:: 3.5 3.6
742-
Unknown escapes consist of ``'\'`` and ASCII letter now raise a
743-
deprecation warning and will be forbidden in Python 3.6.
741+
.. versionchanged:: 3.6
742+
Unknown escapes consisting of ``'\'`` and ASCII letter now are errors.
744743

745744

746745
.. function:: subn(pattern, repl, string, count=0, flags=0)

Lib/sre_parse.py

Lines changed: 5 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -282,33 +282,6 @@ def seek(self, index):
282282
def error(self, msg, offset=0):
283283
return error(msg, self.string, self.tell() - offset)
284284

285-
# The following three functions are not used in this module anymore, but we keep
286-
# them here (with DeprecationWarnings) for backwards compatibility.
287-
288-
def isident(char):
289-
import warnings
290-
warnings.warn('sre_parse.isident() will be removed in 3.5',
291-
DeprecationWarning, stacklevel=2)
292-
return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
293-
294-
def isdigit(char):
295-
import warnings
296-
warnings.warn('sre_parse.isdigit() will be removed in 3.5',
297-
DeprecationWarning, stacklevel=2)
298-
return "0" <= char <= "9"
299-
300-
def isname(name):
301-
import warnings
302-
warnings.warn('sre_parse.isname() will be removed in 3.5',
303-
DeprecationWarning, stacklevel=2)
304-
# check that group name is a valid string
305-
if not isident(name[0]):
306-
return False
307-
for char in name[1:]:
308-
if not isident(char) and not isdigit(char):
309-
return False
310-
return True
311-
312285
def _class_escape(source, escape):
313286
# handle escape code inside character class
314287
code = ESCAPES.get(escape)
@@ -351,9 +324,7 @@ def _class_escape(source, escape):
351324
raise ValueError
352325
if len(escape) == 2:
353326
if c in ASCIILETTERS:
354-
import warnings
355-
warnings.warn('bad escape %s' % escape,
356-
DeprecationWarning, stacklevel=8)
327+
raise source.error('bad escape %s' % escape, len(escape))
357328
return LITERAL, ord(escape[1])
358329
except ValueError:
359330
pass
@@ -418,9 +389,7 @@ def _escape(source, escape, state):
418389
raise source.error("invalid group reference", len(escape))
419390
if len(escape) == 2:
420391
if c in ASCIILETTERS:
421-
import warnings
422-
warnings.warn('bad escape %s' % escape,
423-
DeprecationWarning, stacklevel=8)
392+
raise source.error("bad escape %s" % escape, len(escape))
424393
return LITERAL, ord(escape[1])
425394
except ValueError:
426395
pass
@@ -798,10 +767,7 @@ def fix_flags(src, flags):
798767
# Check and fix flags according to the type of pattern (str or bytes)
799768
if isinstance(src, str):
800769
if flags & SRE_FLAG_LOCALE:
801-
import warnings
802-
warnings.warn("LOCALE flag with a str pattern is deprecated. "
803-
"Will be an error in 3.6",
804-
DeprecationWarning, stacklevel=6)
770+
raise ValueError("cannot use LOCALE flag with a str pattern")
805771
if not flags & SRE_FLAG_ASCII:
806772
flags |= SRE_FLAG_UNICODE
807773
elif flags & SRE_FLAG_UNICODE:
@@ -810,10 +776,7 @@ def fix_flags(src, flags):
810776
if flags & SRE_FLAG_UNICODE:
811777
raise ValueError("cannot use UNICODE flag with a bytes pattern")
812778
if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII:
813-
import warnings
814-
warnings.warn("ASCII and LOCALE flags are incompatible. "
815-
"Will be an error in 3.6",
816-
DeprecationWarning, stacklevel=6)
779+
raise ValueError("ASCII and LOCALE flags are incompatible")
817780
return flags
818781

819782
def parse(str, flags=0, pattern=None):
@@ -914,9 +877,7 @@ def addgroup(index):
914877
this = chr(ESCAPES[this][1])
915878
except KeyError:
916879
if c in ASCIILETTERS:
917-
import warnings
918-
warnings.warn('bad escape %s' % this,
919-
DeprecationWarning, stacklevel=4)
880+
raise s.error('bad escape %s' % this, len(this))
920881
lappend(this)
921882
else:
922883
lappend(this)

Lib/test/test_re.py

Lines changed: 13 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def test_basic_re_sub(self):
124124
(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
125125
for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
126126
with self.subTest(c):
127-
with self.assertWarns(DeprecationWarning):
127+
with self.assertRaises(re.error):
128128
self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
129129

130130
self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
@@ -633,14 +633,10 @@ def test_other_escapes(self):
633633
re.purge() # for warnings
634634
for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
635635
with self.subTest(c):
636-
with self.assertWarns(DeprecationWarning):
637-
self.assertEqual(re.fullmatch('\\%c' % c, c).group(), c)
638-
self.assertIsNone(re.match('\\%c' % c, 'a'))
636+
self.assertRaises(re.error, re.compile, '\\%c' % c)
639637
for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
640638
with self.subTest(c):
641-
with self.assertWarns(DeprecationWarning):
642-
self.assertEqual(re.fullmatch('[\\%c]' % c, c).group(), c)
643-
self.assertIsNone(re.match('[\\%c]' % c, 'a'))
639+
self.assertRaises(re.error, re.compile, '[\\%c]' % c)
644640

645641
def test_string_boundaries(self):
646642
# See http://bugs.python.org/issue10713
@@ -993,10 +989,8 @@ def test_sre_byte_literals(self):
993989
self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
994990
self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
995991
self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
996-
with self.assertWarns(DeprecationWarning):
997-
self.assertTrue(re.match(br"\u1234", b'u1234'))
998-
with self.assertWarns(DeprecationWarning):
999-
self.assertTrue(re.match(br"\U00012345", b'U00012345'))
992+
self.assertRaises(re.error, re.compile, br"\u1234")
993+
self.assertRaises(re.error, re.compile, br"\U00012345")
1000994
self.assertTrue(re.match(br"\0", b"\000"))
1001995
self.assertTrue(re.match(br"\08", b"\0008"))
1002996
self.assertTrue(re.match(br"\01", b"\001"))
@@ -1018,10 +1012,8 @@ def test_sre_byte_class_literals(self):
10181012
self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
10191013
self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
10201014
self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
1021-
with self.assertWarns(DeprecationWarning):
1022-
self.assertTrue(re.match(br"[\u1234]", b'u'))
1023-
with self.assertWarns(DeprecationWarning):
1024-
self.assertTrue(re.match(br"[\U00012345]", b'U'))
1015+
self.assertRaises(re.error, re.compile, br"[\u1234]")
1016+
self.assertRaises(re.error, re.compile, br"[\U00012345]")
10251017
self.checkPatternError(br"[\567]",
10261018
r'octal escape value \567 outside of '
10271019
r'range 0-0o377', 1)
@@ -1363,12 +1355,12 @@ def test_locale_flag(self):
13631355
if bletter:
13641356
self.assertIsNone(pat.match(bletter))
13651357
# Incompatibilities
1366-
self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE)
1367-
self.assertWarns(DeprecationWarning, re.compile, '(?L)')
1368-
self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII)
1369-
self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII)
1370-
self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE)
1371-
self.assertWarns(DeprecationWarning, re.compile, b'(?aL)')
1358+
self.assertRaises(ValueError, re.compile, '', re.LOCALE)
1359+
self.assertRaises(ValueError, re.compile, '(?L)')
1360+
self.assertRaises(ValueError, re.compile, b'', re.LOCALE | re.ASCII)
1361+
self.assertRaises(ValueError, re.compile, b'(?L)', re.ASCII)
1362+
self.assertRaises(ValueError, re.compile, b'(?a)', re.LOCALE)
1363+
self.assertRaises(ValueError, re.compile, b'(?aL)')
13721364

13731365
def test_bug_6509(self):
13741366
# Replacement strings of both types must parse properly.
@@ -1419,13 +1411,6 @@ def test_compile(self):
14191411
# Test behaviour when not given a string or pattern as parameter
14201412
self.assertRaises(TypeError, re.compile, 0)
14211413

1422-
def test_bug_13899(self):
1423-
# Issue #13899: re pattern r"[\A]" should work like "A" but matches
1424-
# nothing. Ditto B and Z.
1425-
with self.assertWarns(DeprecationWarning):
1426-
self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
1427-
['A', 'B', '\b', 'C', 'Z'])
1428-
14291414
@bigmemtest(size=_2G, memuse=1)
14301415
def test_large_search(self, size):
14311416
# Issue #10182: indices were 32-bit-truncated.

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ Core and Builtins
3838
Library
3939
-------
4040

41+
- Issue #27030: Unknown escapes consisting of ``'\'`` and ASCII letter in
42+
regular expressions now are errors.
43+
4144
- Issue #27186: Add os.PathLike support to DirEntry (part of PEP 519).
4245
Initial patch by Jelle Zijlstra.
4346

0 commit comments

Comments
 (0)