Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 59b6865

Browse files
author
Fredrik Lundh
committed
fixed #449964: sre.sub raises an exception if the template contains a
\g<x> group reference followed by a character escape (also restructured a few things on the way to fixing #449000)
1 parent ab3b034 commit 59b6865

4 files changed

Lines changed: 30 additions & 21 deletions

File tree

Lib/sre.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,13 @@ def _subn(pattern, template, text, count=0, sub=0):
251251
else:
252252
template = _compile_repl(template, pattern)
253253
literals = template[1]
254-
sub = 0 # temporarly disabled, see bug #449000
255-
if (sub and not count and pattern._isliteral() and
256-
len(literals) == 1 and literals[0]):
257-
# shortcut: both pattern and string are literals
258-
return string.replace(text, pattern.pattern, literals[0]), 0
254+
if sub and not count:
255+
literal = pattern._getliteral()
256+
if literal and "\\" in literal:
257+
literal = None # may contain untranslated escapes
258+
if literal is not None and len(literals) == 1 and literals[0]:
259+
# shortcut: both pattern and string are literals
260+
return string.replace(text, pattern.pattern, literals[0]), 0
259261
def filter(match, template=template):
260262
return sre_parse.expand_template(template, match)
261263
n = i = 0

Lib/sre_parse.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -647,9 +647,9 @@ def literal(literal, p=p):
647647
p.append((LITERAL, literal))
648648
sep = source[:0]
649649
if type(sep) is type(""):
650-
char = chr
650+
makechar = chr
651651
else:
652-
char = unichr
652+
makechar = unichr
653653
while 1:
654654
this = s.get()
655655
if this is None:
@@ -693,14 +693,14 @@ def literal(literal, p=p):
693693
break
694694
if not code:
695695
this = this[1:]
696-
code = LITERAL, char(atoi(this[-6:], 8) & 0xff)
696+
code = LITERAL, makechar(atoi(this[-6:], 8) & 0xff)
697697
if code[0] is LITERAL:
698698
literal(code[1])
699699
else:
700700
a(code)
701701
else:
702702
try:
703-
this = char(ESCAPES[this][1])
703+
this = makechar(ESCAPES[this][1])
704704
except KeyError:
705705
pass
706706
literal(this)

Lib/test/test_sre.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ def bump_num(matchobj):
104104
test(r"""sre.sub(r'(?P<unk>x)', '\g<unk>\g<unk>', 'xx')""", 'xxxx')
105105
test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>', 'xx')""", 'xxxx')
106106

107+
# bug 449964: fails for group followed by other escape
108+
test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx')""", 'xx\bxx\b')
109+
107110
test(r"""sre.sub(r'a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
108111
test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a')
109112
test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))

Modules/_sre.c

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
* 2001-04-28 fl added __copy__ methods (work in progress)
3232
* 2001-05-14 fl fixes for 1.5.2
3333
* 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
34-
* 2001-09-18 fl
34+
* 2001-09-18 fl added _getliteral helper
3535
*
3636
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
3737
*
@@ -1959,25 +1959,29 @@ pattern_deepcopy(PatternObject* self, PyObject* args)
19591959
}
19601960

19611961
static PyObject*
1962-
pattern_isliteral(PatternObject* self, PyObject* args)
1962+
pattern_getliteral(PatternObject* self, PyObject* args)
19631963
{
1964-
/* internal: return true if pattern consists of literal text only */
1964+
/* internal: if the pattern is a literal string, return that
1965+
string. otherwise, return None */
19651966

19661967
SRE_CODE* code;
1967-
PyObject* isliteral;
1968+
PyObject* literal;
19681969

1969-
if (!PyArg_ParseTuple(args, ":_isliteral"))
1970+
if (!PyArg_ParseTuple(args, ":_getliteral"))
19701971
return NULL;
19711972

19721973
code = PatternObject_GetCode(self);
19731974

1974-
if (code[0] == SRE_OP_INFO && code[2] & SRE_INFO_LITERAL)
1975-
isliteral = Py_True;
1976-
else
1977-
isliteral = Py_False;
1975+
if (code[0] == SRE_OP_INFO && code[2] & SRE_INFO_LITERAL) {
1976+
/* FIXME: extract literal string from code buffer. we can't
1977+
use the pattern member, since it may contain untranslated
1978+
escape codes (see SF bug 449000) */
1979+
literal = Py_None;
1980+
} else
1981+
literal = Py_None; /* no literal */
19781982

1979-
Py_INCREF(isliteral);
1980-
return isliteral;
1983+
Py_INCREF(literal);
1984+
return literal;
19811985
}
19821986

19831987
static PyMethodDef pattern_methods[] = {
@@ -1990,7 +1994,7 @@ static PyMethodDef pattern_methods[] = {
19901994
{"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
19911995
{"__copy__", (PyCFunction) pattern_copy, METH_VARARGS},
19921996
{"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS},
1993-
{"_isliteral", (PyCFunction) pattern_isliteral, METH_VARARGS},
1997+
{"_getliteral", (PyCFunction) pattern_getliteral, METH_VARARGS},
19941998
{NULL, NULL}
19951999
};
19962000

0 commit comments

Comments
 (0)