Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 1296a8d

Browse files
author
Fredrik Lundh
committed
sre.Scanner fixes (from Greg Chapman). also added a Scanner sanity
check to the test suite. added a few missing exception checks in the _sre module
1 parent bec95b9 commit 1296a8d

3 files changed

Lines changed: 43 additions & 5 deletions

File tree

Lib/sre.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -330,32 +330,33 @@ def _pickle(p):
330330
# experimental stuff (see python-dev discussions for details)
331331

332332
class Scanner:
333-
def __init__(self, lexicon):
333+
def __init__(self, lexicon, flags=0):
334334
from sre_constants import BRANCH, SUBPATTERN
335335
self.lexicon = lexicon
336336
# combine phrases into a compound pattern
337337
p = []
338338
s = sre_parse.Pattern()
339+
s.flags = flags
339340
for phrase, action in lexicon:
340341
p.append(sre_parse.SubPattern(s, [
341-
(SUBPATTERN, (len(p), sre_parse.parse(phrase))),
342+
(SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
342343
]))
343344
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
344345
s.groups = len(p)
345346
self.scanner = sre_compile.compile(p)
346347
def scan(self, string):
347348
result = []
348349
append = result.append
349-
match = self.scanner.match
350+
match = self.scanner.scanner(string).match
350351
i = 0
351352
while 1:
352-
m = match(string, i)
353+
m = match()
353354
if not m:
354355
break
355356
j = m.end()
356357
if i == j:
357358
break
358-
action = self.lexicon[m.lastindex][1]
359+
action = self.lexicon[m.lastindex-1][1]
359360
if callable(action):
360361
self.match = m
361362
action = action(self, m.group())

Lib/test/test_sre.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,26 @@ def bump_num(matchobj):
223223
test(r"""pat.match(p) is not None""", 1)
224224
test(r"""pat.match(p).span()""", (0,256))
225225

226+
if verbose:
227+
print 'Running tests on sre.Scanner'
228+
229+
def s_ident(scanner, token): return token
230+
def s_operator(scanner, token): return "op%s" % token
231+
def s_float(scanner, token): return float(token)
232+
def s_int(scanner, token): return int(token)
233+
234+
scanner = sre.Scanner([
235+
(r"[a-zA-Z_]\w*", s_ident),
236+
(r"\d+\.\d*", s_float),
237+
(r"\d+", s_int),
238+
(r"=|\+|-|\*|/", s_operator),
239+
(r"\s+", None),
240+
])
241+
242+
# sanity check
243+
test('scanner.scan("sum = 3*foo + 312.50 + bar")',
244+
(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], ''))
245+
226246
if verbose:
227247
print 'Pickling a SRE_Pattern instance'
228248

Modules/_sre.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1800,6 +1800,11 @@ join(PyObject* list, PyObject* pattern)
18001800
return NULL;
18011801
}
18021802
args = PyTuple_New(1);
1803+
if (!args) {
1804+
Py_DECREF(function);
1805+
Py_DECREF(joiner);
1806+
return NULL;
1807+
}
18031808
PyTuple_SET_ITEM(args, 0, list);
18041809
result = PyObject_CallObject(function, args);
18051810
Py_DECREF(args); /* also removes list */
@@ -1896,6 +1901,10 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
18961901
return NULL;
18971902

18981903
list = PyList_New(0);
1904+
if (!list) {
1905+
state_fini(&state);
1906+
return NULL;
1907+
}
18991908

19001909
while (state.start <= state.end) {
19011910

@@ -1995,6 +2004,10 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
19952004
return NULL;
19962005

19972006
list = PyList_New(0);
2007+
if (!list) {
2008+
state_fini(&state);
2009+
return NULL;
2010+
}
19982011

19992012
n = 0;
20002013
last = state.start;
@@ -2110,6 +2123,10 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
21102123
return NULL;
21112124

21122125
list = PyList_New(0);
2126+
if (!list) {
2127+
state_fini(&state);
2128+
return NULL;
2129+
}
21132130

21142131
n = i = 0;
21152132

0 commit comments

Comments
 (0)