Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a54aae0

Browse files
Issue #23622: Unknown escapes in regular expressions that consist of '\'
and ASCII letter now raise a deprecation warning and will be forbidden in Python 3.6.
1 parent 793c14e commit a54aae0

6 files changed

Lines changed: 60 additions & 19 deletions

File tree

Doc/howto/regex.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1138,7 +1138,7 @@ Empty matches are replaced only when they're not adjacent to a previous match.
11381138

11391139
If *replacement* is a string, any backslash escapes in it are processed. That
11401140
is, ``\n`` is converted to a single newline character, ``\r`` is converted to a
1141-
carriage return, and so forth. Unknown escapes such as ``\j`` are left alone.
1141+
carriage return, and so forth. Unknown escapes such as ``\&`` are left alone.
11421142
Backreferences, such as ``\6``, are replaced with the substring matched by the
11431143
corresponding group in the RE. This lets you incorporate portions of the
11441144
original text in the resulting replacement string.

Doc/library/re.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,10 @@ three digits in length.
438438
.. versionchanged:: 3.3
439439
The ``'\u'`` and ``'\U'`` escape sequences have been added.
440440

441+
.. deprecated-removed:: 3.5 3.6
442+
Unknown escapes consist of ``'\'`` and ASCII letter now raise a
443+
deprecation warning and will be forbidden in Python 3.6.
444+
441445

442446
.. seealso::
443447

@@ -687,7 +691,7 @@ form.
687691
*string* is returned unchanged. *repl* can be a string or a function; if it is
688692
a string, any backslash escapes in it are processed. That is, ``\n`` is
689693
converted to a single newline character, ``\r`` is converted to a carriage return, and
690-
so forth. Unknown escapes such as ``\j`` are left alone. Backreferences, such
694+
so forth. Unknown escapes such as ``\&`` are left alone. Backreferences, such
691695
as ``\6``, are replaced with the substring matched by group 6 in the pattern.
692696
For example:
693697

@@ -732,6 +736,10 @@ form.
732736
.. versionchanged:: 3.5
733737
Unmatched groups are replaced with an empty string.
734738

739+
.. deprecated-removed:: 3.5 3.6
740+
Unknown escapes consist of ``'\'`` and ASCII letter now raise a
741+
deprecation warning and will be forbidden in Python 3.6.
742+
735743

736744
.. function:: subn(pattern, repl, string, count=0, flags=0)
737745

Lib/sre_parse.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
OCTDIGITS = frozenset("01234567")
2323
HEXDIGITS = frozenset("0123456789abcdefABCDEF")
24+
ASCIILETTERS = frozenset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
2425

2526
WHITESPACE = frozenset(" \t\n\r\v\f")
2627

@@ -344,6 +345,10 @@ def _class_escape(source, escape):
344345
elif c in DIGITS:
345346
raise ValueError
346347
if len(escape) == 2:
348+
if c in ASCIILETTERS:
349+
import warnings
350+
warnings.warn('bad escape %s' % escape,
351+
DeprecationWarning, stacklevel=8)
347352
return LITERAL, ord(escape[1])
348353
except ValueError:
349354
pass
@@ -407,6 +412,10 @@ def _escape(source, escape, state):
407412
return GROUPREF, group
408413
raise ValueError
409414
if len(escape) == 2:
415+
if c in ASCIILETTERS:
416+
import warnings
417+
warnings.warn('bad escape %s' % escape,
418+
DeprecationWarning, stacklevel=8)
410419
return LITERAL, ord(escape[1])
411420
except ValueError:
412421
pass
@@ -903,7 +912,10 @@ def addgroup(index):
903912
try:
904913
this = chr(ESCAPES[this][1])
905914
except KeyError:
906-
pass
915+
if c in ASCIILETTERS:
916+
import warnings
917+
warnings.warn('bad escape %s' % this,
918+
DeprecationWarning, stacklevel=5)
907919
lappend(this)
908920
else:
909921
lappend(this)

Lib/test/re_tests.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
(r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
8888
# NOTE: not an error under PCRE/PRE:
8989
(r'\u', '', SYNTAX_ERROR), # A Perl escape
90-
(r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
90+
# (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
9191
(r'\xff', '\377', SUCCEED, 'found', chr(255)),
9292
# new \x semantics
9393
(r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
@@ -607,8 +607,8 @@
607607
# new \x semantics
608608
(r'\x00ff', '\377', FAIL),
609609
# (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
610-
(r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
611-
('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
610+
(r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', '\t\n\v\r\f\a'),
611+
('\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', '\t\n\v\r\f\a'),
612612
(r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
613613
(r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
614614

Lib/test/test_re.py

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,14 @@ def test_basic_re_sub(self):
100100
self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
101101
self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
102102

103-
self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
104-
'\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
105-
self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
106-
self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
107-
(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
103+
self.assertEqual(re.sub('a', r'\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
104+
self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
105+
self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'),
106+
(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
107+
for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
108+
with self.subTest(c):
109+
with self.assertWarns(DeprecationWarning):
110+
self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
108111

109112
self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
110113

@@ -551,14 +554,23 @@ def test_other_escapes(self):
551554
self.assertEqual(re.match(r"\(", '(').group(), '(')
552555
self.assertIsNone(re.match(r"\(", ')'))
553556
self.assertEqual(re.match(r"\\", '\\').group(), '\\')
554-
self.assertEqual(re.match(r"\y", 'y').group(), 'y')
555-
self.assertIsNone(re.match(r"\y", 'z'))
556557
self.assertEqual(re.match(r"[\]]", ']').group(), ']')
557558
self.assertIsNone(re.match(r"[\]]", '['))
558559
self.assertEqual(re.match(r"[a\-c]", '-').group(), '-')
559560
self.assertIsNone(re.match(r"[a\-c]", 'b'))
560561
self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
561562
self.assertIsNone(re.match(r"[\^a]+", 'b'))
563+
re.purge() # for warnings
564+
for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
565+
with self.subTest(c):
566+
with self.assertWarns(DeprecationWarning):
567+
self.assertEqual(re.fullmatch('\\%c' % c, c).group(), c)
568+
self.assertIsNone(re.match('\\%c' % c, 'a'))
569+
for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
570+
with self.subTest(c):
571+
with self.assertWarns(DeprecationWarning):
572+
self.assertEqual(re.fullmatch('[\\%c]' % c, c).group(), c)
573+
self.assertIsNone(re.match('[\\%c]' % c, 'a'))
562574

563575
def test_string_boundaries(self):
564576
# See http://bugs.python.org/issue10713
@@ -907,8 +919,10 @@ def test_sre_byte_literals(self):
907919
self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
908920
self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
909921
self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
910-
self.assertTrue(re.match(br"\u", b'u'))
911-
self.assertTrue(re.match(br"\U", b'U'))
922+
with self.assertWarns(DeprecationWarning):
923+
self.assertTrue(re.match(br"\u1234", b'u1234'))
924+
with self.assertWarns(DeprecationWarning):
925+
self.assertTrue(re.match(br"\U00012345", b'U00012345'))
912926
self.assertTrue(re.match(br"\0", b"\000"))
913927
self.assertTrue(re.match(br"\08", b"\0008"))
914928
self.assertTrue(re.match(br"\01", b"\001"))
@@ -928,8 +942,10 @@ def test_sre_byte_class_literals(self):
928942
self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
929943
self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
930944
self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
931-
self.assertTrue(re.match(br"[\u]", b'u'))
932-
self.assertTrue(re.match(br"[\U]", b'U'))
945+
with self.assertWarns(DeprecationWarning):
946+
self.assertTrue(re.match(br"[\u1234]", b'u'))
947+
with self.assertWarns(DeprecationWarning):
948+
self.assertTrue(re.match(br"[\U00012345]", b'U'))
933949
self.assertRaises(re.error, re.match, br"[\567]", b"")
934950
self.assertRaises(re.error, re.match, br"[\911]", b"")
935951
self.assertRaises(re.error, re.match, br"[\x1z]", b"")
@@ -1304,8 +1320,9 @@ def test_compile(self):
13041320
def test_bug_13899(self):
13051321
# Issue #13899: re pattern r"[\A]" should work like "A" but matches
13061322
# nothing. Ditto B and Z.
1307-
self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
1308-
['A', 'B', '\b', 'C', 'Z'])
1323+
with self.assertWarns(DeprecationWarning):
1324+
self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
1325+
['A', 'B', '\b', 'C', 'Z'])
13091326

13101327
@bigmemtest(size=_2G, memuse=1)
13111328
def test_large_search(self, size):

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ Core and Builtins
3030
Library
3131
-------
3232

33+
- Issue #23622: Unknown escapes in regular expressions that consist of ``'\'``
34+
and ASCII letter now raise a deprecation warning and will be forbidden in
35+
Python 3.6.
36+
3337
- Issue #23671: string.Template now allows to specify the "self" parameter as
3438
keyword argument. string.Formatter now allows to specify the "self" and
3539
the "format_string" parameters as keyword arguments.

0 commit comments

Comments
 (0)