Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ebc37b2

Browse files
author
Fredrik Lundh
committed
-- properly reset groups in findall (bug #117612)
-- fixed negative lookbehind to work correctly at the beginning of the target string (bug #117242) -- improved syntax check; you can no longer refer to a group inside itself (bug #110866)
1 parent 8fdb638 commit ebc37b2

3 files changed

Lines changed: 33 additions & 17 deletions

File tree

Lib/sre_parse.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,20 @@ class Pattern:
6262
# master pattern object. keeps track of global attributes
6363
def __init__(self):
6464
self.flags = 0
65+
self.open = []
6566
self.groups = 1
6667
self.groupdict = {}
67-
def getgroup(self, name=None):
68+
def opengroup(self, name=None):
6869
gid = self.groups
6970
self.groups = gid + 1
7071
if name:
7172
self.groupdict[name] = gid
73+
self.open.append(gid)
7274
return gid
75+
def closegroup(self, gid):
76+
self.open.remove(gid)
77+
def checkgroup(self, gid):
78+
return gid < self.groups and gid not in self.open
7379

7480
class SubPattern:
7581
# a subpattern, in intermediate form
@@ -278,6 +284,8 @@ def _escape(source, escape, state):
278284
# got at least one decimal digit; this is a group reference
279285
group = _group(escape, state.groups)
280286
if group:
287+
if not state.checkgroup(group):
288+
raise error, "cannot refer to open group"
281289
return GROUPREF, group
282290
raise ValueError
283291
if len(escape) == 2:
@@ -547,10 +555,12 @@ def _parse(source, state):
547555
# anonymous group
548556
group = None
549557
else:
550-
group = state.getgroup(name)
558+
group = state.opengroup(name)
551559
p = _parse_sub(source, state)
552560
if not source.match(")"):
553561
raise error, "unbalanced parenthesis"
562+
if group is not None:
563+
state.closegroup(group)
554564
subpattern.append((SUBPATTERN, (group, p)))
555565
else:
556566
while 1:

Lib/test/test_sre.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,9 @@ def bump_num(matchobj):
167167
[(":", ""), (":", ":"), (":", "::")])
168168
test(r"""sre.findall(r"(a)|(b)", "abc")""", [("a", ""), ("", "b")])
169169

170+
# bug 117612
171+
test(r"""sre.findall(r"(a|(b))", "aba")""", [("a", ""),("b", "b"),("a", "")])
172+
170173
if verbose:
171174
print "Running tests on sre.match"
172175

Modules/_sre.c

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55
*
66
* partial history:
77
* 1999-10-24 fl created (based on existing template matcher code)
8-
* 2000-03-06 fl first alpha, sort of (0.5)
9-
* 2000-06-30 fl added fast search optimization (0.9.3)
10-
* 2000-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
11-
* 2000-07-02 fl added charset optimizations, etc (0.9.5)
8+
* 2000-03-06 fl first alpha, sort of
9+
* 2000-06-30 fl added fast search optimization
10+
* 2000-06-30 fl added assert (lookahead) primitives, etc
11+
* 2000-07-02 fl added charset optimizations, etc
1212
* 2000-07-03 fl store code in pattern object, lookbehind, etc
1313
* 2000-07-08 fl added regs attribute
14-
* 2000-07-21 fl reset lastindex in scanner methods (0.9.6)
15-
* 2000-08-01 fl fixes for 1.6b1 (0.9.8)
14+
* 2000-07-21 fl reset lastindex in scanner methods
15+
* 2000-08-01 fl fixes for 1.6b1
1616
* 2000-08-03 fl added recursion limit
1717
* 2000-08-07 fl use PyOS_CheckStack() if available
1818
* 2000-08-08 fl changed findall to return empty strings instead of None
@@ -21,6 +21,7 @@
2121
* 2000-09-20 fl added expand method
2222
* 2000-09-21 fl don't use the buffer interface for unicode strings
2323
* 2000-10-03 fl fixed assert_not primitive; support keyword arguments
24+
* 2000-10-24 fl really fixed assert_not; reset groups in findall
2425
*
2526
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
2627
*
@@ -35,7 +36,7 @@
3536

3637
#ifndef SRE_RECURSIVE
3738

38-
char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB ";
39+
char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2000 by Secret Labs AB ";
3940

4041
#include "Python.h"
4142

@@ -783,13 +784,13 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
783784
/* <ASSERT_NOT> <skip> <back> <pattern> */
784785
TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
785786
state->ptr = ptr - pattern[1];
786-
if (state->ptr < state->beginning)
787-
return 0;
788-
i = SRE_MATCH(state, pattern + 2, level + 1);
789-
if (i < 0)
790-
return i;
791-
if (i)
792-
return 0;
787+
if (state->ptr >= state->beginning) {
788+
i = SRE_MATCH(state, pattern + 2, level + 1);
789+
if (i < 0)
790+
return i;
791+
if (i)
792+
return 0;
793+
}
793794
pattern += pattern[0];
794795
break;
795796

@@ -1199,7 +1200,7 @@ _compile(PyObject* self_, PyObject* args)
11991200
n = PySequence_Length(code);
12001201
#endif
12011202

1202-
self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
1203+
self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
12031204
if (!self) {
12041205
Py_DECREF(code);
12051206
return NULL;
@@ -1680,6 +1681,8 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
16801681

16811682
PyObject* item;
16821683

1684+
state_reset(&state);
1685+
16831686
state.ptr = state.start;
16841687

16851688
if (state.charsize == 1) {

0 commit comments

Comments
 (0)