Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a01a2ee

Browse files
committed
Applying modified version of patch #1018386, which fixes
some escaping bugs in SRE.
1 parent ab9351b commit a01a2ee

3 files changed

Lines changed: 91 additions & 43 deletions

File tree

Doc/lib/libre.tex

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,8 @@ \subsection{Regular Expression Syntax \label{re-syntax}}
387387

388388
Octal escapes are included in a limited form: If the first digit is a
389389
0, or if there are three octal digits, it is considered an octal
390-
escape. Otherwise, it is a group reference.
390+
escape. Otherwise, it is a group reference. As for string literals,
391+
octal escapes are always at most three digits in length.
391392

392393

393394
% Note the lack of a period in the section title; it causes problems

Lib/sre_parse.py

Lines changed: 36 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -217,21 +217,11 @@ def isname(name):
217217
# check that group name is a valid string
218218
if not isident(name[0]):
219219
return False
220-
for char in name:
220+
for char in name[1:]:
221221
if not isident(char) and not isdigit(char):
222222
return False
223223
return True
224224

225-
def _group(escape, groups):
226-
# check if the escape string represents a valid group
227-
try:
228-
gid = int(escape[1:])
229-
if gid and gid < groups:
230-
return gid
231-
except ValueError:
232-
pass
233-
return None # not a valid group
234-
235225
def _class_escape(source, escape):
236226
# handle escape code inside character class
237227
code = ESCAPES.get(escape)
@@ -241,20 +231,23 @@ def _class_escape(source, escape):
241231
if code:
242232
return code
243233
try:
244-
if escape[1:2] == "x":
234+
c = escape[1:2]
235+
if c == "x":
245236
# hexadecimal escape (exactly two digits)
246237
while source.next in HEXDIGITS and len(escape) < 4:
247238
escape = escape + source.get()
248239
escape = escape[2:]
249240
if len(escape) != 2:
250241
raise error, "bogus escape: %s" % repr("\\" + escape)
251242
return LITERAL, int(escape, 16) & 0xff
252-
elif escape[1:2] in OCTDIGITS:
243+
elif c in OCTDIGITS:
253244
# octal escape (up to three digits)
254-
while source.next in OCTDIGITS and len(escape) < 5:
245+
while source.next in OCTDIGITS and len(escape) < 4:
255246
escape = escape + source.get()
256247
escape = escape[1:]
257248
return LITERAL, int(escape, 8) & 0xff
249+
elif c in DIGITS:
250+
raise error, "bogus escape: %s" % repr(escape)
258251
if len(escape) == 2:
259252
return LITERAL, ord(escape[1])
260253
except ValueError:
@@ -270,19 +263,20 @@ def _escape(source, escape, state):
270263
if code:
271264
return code
272265
try:
273-
if escape[1:2] == "x":
266+
c = escape[1:2]
267+
if c == "x":
274268
# hexadecimal escape
275269
while source.next in HEXDIGITS and len(escape) < 4:
276270
escape = escape + source.get()
277271
if len(escape) != 4:
278272
raise ValueError
279273
return LITERAL, int(escape[2:], 16) & 0xff
280-
elif escape[1:2] == "0":
274+
elif c == "0":
281275
# octal escape
282276
while source.next in OCTDIGITS and len(escape) < 4:
283277
escape = escape + source.get()
284278
return LITERAL, int(escape[1:], 8) & 0xff
285-
elif escape[1:2] in DIGITS:
279+
elif c in DIGITS:
286280
# octal escape *or* decimal group reference (sigh)
287281
if source.next in DIGITS:
288282
escape = escape + source.get()
@@ -291,9 +285,9 @@ def _escape(source, escape, state):
291285
# got three octal digits; this is an octal escape
292286
escape = escape + source.get()
293287
return LITERAL, int(escape[1:], 8) & 0xff
294-
# got at least one decimal digit; this is a group reference
295-
group = _group(escape, state.groups)
296-
if group:
288+
# not an octal escape, so this is a group reference
289+
group = int(escape[1:])
290+
if group < state.groups:
297291
if not state.checkgroup(group):
298292
raise error, "cannot refer to open group"
299293
return GROUPREF, group
@@ -709,7 +703,8 @@ def literal(literal, p=p, pappend=a):
709703
break # end of replacement string
710704
if this and this[0] == "\\":
711705
# group
712-
if this == "\\g":
706+
c = this[1:2]
707+
if c == "g":
713708
name = ""
714709
if s.match("<"):
715710
while 1:
@@ -723,6 +718,8 @@ def literal(literal, p=p, pappend=a):
723718
raise error, "bad group name"
724719
try:
725720
index = int(name)
721+
if index < 0:
722+
raise error, "negative group number"
726723
except ValueError:
727724
if not isname(name):
728725
raise error, "bad character in group name"
@@ -731,26 +728,23 @@ def literal(literal, p=p, pappend=a):
731728
except KeyError:
732729
raise IndexError, "unknown group name"
733730
a((MARK, index))
734-
elif len(this) > 1 and this[1] in DIGITS:
735-
code = None
736-
while 1:
737-
group = _group(this, pattern.groups+1)
738-
if group:
739-
if (s.next not in DIGITS or
740-
not _group(this + s.next, pattern.groups+1)):
741-
code = MARK, group
742-
break
743-
elif s.next in OCTDIGITS:
731+
elif c == "0":
732+
if s.next in OCTDIGITS:
733+
this = this + sget()
734+
if s.next in OCTDIGITS:
744735
this = this + sget()
745-
else:
746-
break
747-
if not code:
748-
this = this[1:]
749-
code = LITERAL, makechar(int(this[-6:], 8) & 0xff)
750-
if code[0] is LITERAL:
751-
literal(code[1])
752-
else:
753-
a(code)
736+
literal(makechar(int(this[1:], 8) & 0xff))
737+
elif c in DIGITS:
738+
isoctal = False
739+
if s.next in DIGITS:
740+
this = this + sget()
741+
if (c in OCTDIGITS and s.next in OCTDIGITS and
742+
this[2] in OCTDIGITS):
743+
this = this + sget()
744+
isoctal = True
745+
literal(makechar(int(this[1:], 8) & 0xff))
746+
if not isoctal:
747+
a((MARK, int(this[1:])))
754748
else:
755749
try:
756750
this = makechar(ESCAPES[this][1])
@@ -782,7 +776,7 @@ def expand_template(template, match):
782776
for index, group in groups:
783777
literals[index] = s = g(group)
784778
if s is None:
785-
raise IndexError
779+
raise error, "unmatched group"
786780
except IndexError:
787-
raise error, "empty group"
781+
raise error, "invalid group reference"
788782
return sep.join(literals)

Lib/test/test_re.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,48 @@ def test_bug_449000(self):
8383
self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
8484
'abc\ndef\n')
8585

86+
def test_sub_template_numeric_escape(self):
87+
# bug 776311 and friends
88+
self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
89+
self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
90+
self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
91+
self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
92+
self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
93+
self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
94+
self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
95+
96+
self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
97+
self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
98+
99+
self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
100+
self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
101+
self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
102+
self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
103+
self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
104+
105+
self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
106+
self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
107+
108+
self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
109+
self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
110+
self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
111+
self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
112+
self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
113+
self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
114+
self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
115+
self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
116+
self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
117+
self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
118+
self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
119+
self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
120+
121+
# in python2.3 (etc), these loop endlessly in sre_parser.py
122+
self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
123+
self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
124+
'xz8')
125+
self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
126+
'xza')
127+
86128
def test_qualified_re_sub(self):
87129
self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
88130
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
@@ -105,6 +147,7 @@ def test_symbolic_refs(self):
105147
self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
106148
self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
107149
self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
150+
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
108151

109152
def test_re_subn(self):
110153
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
@@ -386,6 +429,16 @@ def test_sre_character_literals(self):
386429
self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
387430
self.assertRaises(re.error, re.match, "\911", "")
388431

432+
def test_sre_character_class_literals(self):
433+
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
434+
self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
435+
self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
436+
self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
437+
self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
438+
self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
439+
self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
440+
self.assertRaises(re.error, re.match, "[\911]", "")
441+
389442
def test_bug_113254(self):
390443
self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
391444
self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)

0 commit comments

Comments
 (0)