Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 21009b9

Browse files
author
Fredrik Lundh
committed
an SRE bugfix a day keeps Guido away...
#462270: sub-tle difference between pre.sub and sre.sub. PRE ignored an empty match at the previous location, SRE didn't. also synced with Secret Labs "sreopen" codebase.
1 parent 18d8d5a commit 21009b9

3 files changed

Lines changed: 25 additions & 13 deletions

File tree

Lib/sre.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
"|" A|B, creates an RE that will match either A or B.
4646
(...) Matches the RE inside the parentheses.
4747
The contents can be retrieved or matched later in the string.
48-
(?iLmsx) Set the I, L, M, S, or X flag for the RE.
48+
(?iLmsx) Set the I, L, M, S, or X flag for the RE (see below).
4949
(?:...) Non-grouping version of regular parentheses.
5050
(?P<name>...) The substring matched by the group is accessible by name.
5151
(?P=name) Matches the text matched earlier by the group named name.
@@ -80,7 +80,6 @@
8080
findall Find all occurrences of a pattern in a string.
8181
compile Compile a pattern into a RegexObject.
8282
purge Clear the regular expression cache.
83-
template Compile a template pattern, returning a pattern object.
8483
escape Backslash all non-alphanumerics in a string.
8584
8685
Some of the functions in this module takes flags as optional parameters:
@@ -90,11 +89,12 @@
9089
"$" matches the end of lines as well as the string.
9190
S DOTALL "." matches any character at all, including the newline.
9291
X VERBOSE Ignore whitespace and comments for nicer looking RE's.
93-
U UNICODE Use unicode locale.
92+
U UNICODE Make \w, \W, \b, \B, dependent on the Unicode locale.
9493
9594
This module also defines an exception 'error'.
9695
9796
"""
97+
9898
import sre_compile
9999
import sre_parse
100100

@@ -104,7 +104,7 @@
104104
"U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
105105
"UNICODE", "error" ]
106106

107-
__version__ = "2.1b2"
107+
__version__ = "2.1.1"
108108

109109
# this module works under 1.5.2 and later. don't use string methods
110110
import string
@@ -269,6 +269,9 @@ def filter(match, template=template):
269269
b, e = m.span()
270270
if i < b:
271271
append(text[i:b])
272+
elif i == b == e and n:
273+
append(text[i:b])
274+
continue # ignore empty match at previous position
272275
append(filter(m))
273276
i = e
274277
n = n + 1

Lib/test/test_sre.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,10 @@ def bump_num(matchobj):
123123
test(r"""sre.sub(r'\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
124124
test(r"""sre.sub('\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
125125

126+
# Test for empty sub() behaviour, see SF bug #462270
127+
test(r"""sre.sub('x*', '-', 'abxd')""", '-a-b-d-')
128+
test(r"""sre.sub('x+', '-', 'abxd')""", 'ab-d')
129+
126130
if verbose:
127131
print 'Running tests on symbolic references'
128132

Modules/_sre.c

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
* 2001-04-28 fl added __copy__ methods (work in progress)
3232
* 2001-05-14 fl fixes for 1.5.2
3333
* 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
34+
* 2001-09-18 fl
3435
*
3536
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
3637
*
@@ -133,6 +134,8 @@ static char copyright[] =
133134
#define SRE_ALNUM_MASK 8
134135
#define SRE_WORD_MASK 16
135136

137+
/* FIXME: this assumes ASCII. create tables in init_sre() instead */
138+
136139
static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
137140
2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
138141
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
@@ -1141,6 +1144,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
11411144
}
11421145

11431146
/* can't end up here */
1147+
/* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
11441148
}
11451149

11461150
LOCAL(int)
@@ -2624,16 +2628,17 @@ init_sre(void)
26242628
m = Py_InitModule("_" SRE_MODULE, _functions);
26252629
d = PyModule_GetDict(m);
26262630

2627-
PyDict_SetItemString(
2628-
d, "MAGIC", (x = (PyObject*) PyInt_FromLong(SRE_MAGIC))
2629-
);
2630-
Py_XDECREF(x);
2631-
2632-
PyDict_SetItemString(
2633-
d, "copyright", (x = (PyObject*)PyString_FromString(copyright))
2634-
);
2635-
Py_XDECREF(x);
2631+
x = PyInt_FromLong(SRE_MAGIC);
2632+
if (x) {
2633+
PyDict_SetItemString(d, "MAGIC", x);
2634+
Py_DECREF(x);
2635+
}
26362636

2637+
x = PyString_FromString(copyright);
2638+
if (x) {
2639+
PyDict_SetItemString(d, "copyright", x);
2640+
Py_DECREF(x);
2641+
}
26372642
}
26382643

26392644
#endif /* !defined(SRE_RECURSIVE) */

0 commit comments

Comments
 (0)