Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c563caf

Browse files
Issue #22362: Forbidden ambiguous octal escapes out of range 0-0o377 in
regular expressions.
1 parent d8644db commit c563caf

3 files changed

Lines changed: 26 additions & 9 deletions

File tree

Lib/sre_parse.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,11 @@ def _class_escape(source, escape):
295295
elif c in OCTDIGITS:
296296
# octal escape (up to three digits)
297297
escape += source.getwhile(2, OCTDIGITS)
298-
return LITERAL, int(escape[1:], 8) & 0xff
298+
c = int(escape[1:], 8)
299+
if c > 0o377:
300+
raise error('octal escape value %r outside of '
301+
'range 0-0o377' % escape)
302+
return LITERAL, c
299303
elif c in DIGITS:
300304
raise ValueError
301305
if len(escape) == 2:
@@ -337,7 +341,7 @@ def _escape(source, escape, state):
337341
elif c == "0":
338342
# octal escape
339343
escape += source.getwhile(2, OCTDIGITS)
340-
return LITERAL, int(escape[1:], 8) & 0xff
344+
return LITERAL, int(escape[1:], 8)
341345
elif c in DIGITS:
342346
# octal escape *or* decimal group reference (sigh)
343347
if source.next in DIGITS:
@@ -346,7 +350,11 @@ def _escape(source, escape, state):
346350
source.next in OCTDIGITS):
347351
# got three octal digits; this is an octal escape
348352
escape = escape + source.get()
349-
return LITERAL, int(escape[1:], 8) & 0xff
353+
c = int(escape[1:], 8)
354+
if c > 0o377:
355+
raise error('octal escape value %r outside of '
356+
'range 0-0o377' % escape)
357+
return LITERAL, c
350358
# not an octal escape, so this is a group reference
351359
group = int(escape[1:])
352360
if group < state.groups:
@@ -837,7 +845,11 @@ def addgroup(index):
837845
s.next in OCTDIGITS):
838846
this += sget()
839847
isoctal = True
840-
lappend(chr(int(this[1:], 8) & 0xff))
848+
c = int(this[1:], 8)
849+
if c > 0o377:
850+
raise error('octal escape value %r outside of '
851+
'range 0-0o377' % this)
852+
lappend(chr(c))
841853
if not isoctal:
842854
addgroup(int(this[1:]))
843855
else:

Lib/test/test_re.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,8 @@ def test_sub_template_numeric_escape(self):
154154
self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
155155
self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
156156

157-
self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
158-
self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
157+
self.assertRaises(re.error, re.sub, 'x', r'\400', 'x')
158+
self.assertRaises(re.error, re.sub, 'x', r'\777', 'x')
159159

160160
self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
161161
self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
@@ -700,7 +700,7 @@ def test_sre_character_literals(self):
700700
self.assertTrue(re.match(r"\08", "\0008"))
701701
self.assertTrue(re.match(r"\01", "\001"))
702702
self.assertTrue(re.match(r"\018", "\0018"))
703-
self.assertTrue(re.match(r"\567", chr(0o167)))
703+
self.assertRaises(re.error, re.match, r"\567", "")
704704
self.assertRaises(re.error, re.match, r"\911", "")
705705
self.assertRaises(re.error, re.match, r"\x1", "")
706706
self.assertRaises(re.error, re.match, r"\x1z", "")
@@ -728,12 +728,13 @@ def test_sre_character_class_literals(self):
728728
self.assertTrue(re.match(r"[\U%08x]" % i, chr(i)))
729729
self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
730730
self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
731-
self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
731+
self.assertRaises(re.error, re.match, r"[\567]", "")
732732
self.assertRaises(re.error, re.match, r"[\911]", "")
733733
self.assertRaises(re.error, re.match, r"[\x1z]", "")
734734
self.assertRaises(re.error, re.match, r"[\u123z]", "")
735735
self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
736736
self.assertRaises(re.error, re.match, r"[\U00110000]", "")
737+
self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
737738

738739
def test_sre_byte_literals(self):
739740
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
@@ -749,7 +750,7 @@ def test_sre_byte_literals(self):
749750
self.assertTrue(re.match(br"\08", b"\0008"))
750751
self.assertTrue(re.match(br"\01", b"\001"))
751752
self.assertTrue(re.match(br"\018", b"\0018"))
752-
self.assertTrue(re.match(br"\567", bytes([0o167])))
753+
self.assertRaises(re.error, re.match, br"\567", b"")
753754
self.assertRaises(re.error, re.match, br"\911", b"")
754755
self.assertRaises(re.error, re.match, br"\x1", b"")
755756
self.assertRaises(re.error, re.match, br"\x1z", b"")
@@ -766,6 +767,7 @@ def test_sre_byte_class_literals(self):
766767
self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
767768
self.assertTrue(re.match(br"[\u]", b'u'))
768769
self.assertTrue(re.match(br"[\U]", b'U'))
770+
self.assertRaises(re.error, re.match, br"[\567]", b"")
769771
self.assertRaises(re.error, re.match, br"[\911]", b"")
770772
self.assertRaises(re.error, re.match, br"[\x1z]", b"")
771773

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ Core and Builtins
137137
Library
138138
-------
139139

140+
- Issue #22362: Forbidden ambiguous octal escapes out of range 0-0o377 in
141+
regular expressions.
142+
140143
- Issue #20912: Now directories added to ZIP file have correct Unix and MS-DOS
141144
directory attributes.
142145

0 commit comments

Comments
 (0)