Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 72b82ba

Browse files
author
Fredrik Lundh
committed
- fixed grouping error bug
- changed "group" operator to "groupref"
1 parent 6f01398 commit 72b82ba

6 files changed

Lines changed: 55 additions & 31 deletions

File tree

Lib/sre_compile.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -148,18 +148,25 @@ def fixup(literal, flags=flags):
148148
skip = len(code); emit(0)
149149
emit(av[0])
150150
emit(av[1])
151+
mark = MAXCODE
152+
if av[2][0][0] == SUBPATTERN:
153+
# repeated subpattern
154+
gid, foo = av[2][0][1]
155+
if gid:
156+
mark = (gid-1)*2
157+
emit(mark)
151158
_compile(code, av[2], flags)
152159
emit(OPCODES[SUCCESS])
153160
code[skip] = len(code) - skip
154161
elif op is SUBPATTERN:
155-
group = av[0]
156-
if group:
162+
gid = av[0]
163+
if gid:
157164
emit(OPCODES[MARK])
158-
emit((group-1)*2)
165+
emit((gid-1)*2)
159166
_compile(code, av[1], flags)
160-
if group:
167+
if gid:
161168
emit(OPCODES[MARK])
162-
emit((group-1)*2+1)
169+
emit((gid-1)*2+1)
163170
elif op in (SUCCESS, FAILURE):
164171
emit(OPCODES[op])
165172
elif op in (ASSERT, ASSERT_NOT):
@@ -207,7 +214,7 @@ def fixup(literal, flags=flags):
207214
emit(CHCODES[CH_UNICODE[av]])
208215
else:
209216
emit(CHCODES[av])
210-
elif op is GROUP:
217+
elif op is GROUPREF:
211218
if flags & SRE_FLAG_IGNORECASE:
212219
emit(OPCODES[OP_IGNORE[op]])
213220
else:

Lib/sre_constants.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ class error(Exception):
2929
CALL = "call"
3030
CATEGORY = "category"
3131
CHARSET = "charset"
32-
GROUP = "group"
33-
GROUP_IGNORE = "group_ignore"
32+
GROUPREF = "groupref"
33+
GROUPREF_IGNORE = "groupref_ignore"
3434
IN = "in"
3535
IN_IGNORE = "in_ignore"
3636
INDEX = "index"
@@ -90,7 +90,7 @@ class error(Exception):
9090
CALL,
9191
CATEGORY,
9292
CHARSET,
93-
GROUP, GROUP_IGNORE,
93+
GROUPREF, GROUPREF_IGNORE,
9494
INDEX,
9595
IN, IN_IGNORE,
9696
INFO,
@@ -136,7 +136,7 @@ def makedict(list):
136136

137137
# replacement operations for "ignore case" mode
138138
OP_IGNORE = {
139-
GROUP: GROUP_IGNORE,
139+
GROUPREF: GROUPREF_IGNORE,
140140
IN: IN_IGNORE,
141141
LITERAL: LITERAL_IGNORE,
142142
NOT_LITERAL: NOT_LITERAL_IGNORE

Lib/sre_parse.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ def _escape(source, escape, state):
241241
if group:
242242
if (not source.next or
243243
not _group(escape + source.next, state.groups)):
244-
return GROUP, group
244+
return GROUPREF, group
245245
escape = escape + source.get()
246246
elif source.next in OCTDIGITS:
247247
escape = escape + source.get()
@@ -450,7 +450,7 @@ def _parse(source, state):
450450
gid = state.groupdict.get(name)
451451
if gid is None:
452452
raise error, "unknown group name"
453-
subpattern.append((GROUP, gid))
453+
subpattern.append((GROUPREF, gid))
454454
elif source.match("#"):
455455
index = ""
456456
while 1:

Lib/test/output/test_sre

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
test_sre
22
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
33
=== Failed incorrectly ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a')
4-
=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/'
5-
=== grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a'
6-
=== grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A'
74
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')

Modules/_sre.c

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
406406
int stackbase;
407407
int lastmark;
408408
int i, count;
409+
SRE_STACK* sp;
409410

410411
/* FIXME: this is a hack! */
411412
void* mark_copy[SRE_MARK_SIZE];
@@ -571,8 +572,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
571572
/* set mark */
572573
/* args: <mark> */
573574
TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
574-
if (state->lastmark < pattern[0])
575-
state->lastmark = pattern[0];
575+
if (state->lastmark < pattern[0]+1)
576+
state->lastmark = pattern[0]+1;
576577
if (!mark) {
577578
mark = mark_copy;
578579
memcpy(mark, state->mark, state->lastmark*sizeof(void*));
@@ -780,10 +781,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
780781
#endif
781782

782783
case SRE_OP_MAX_REPEAT:
783-
/* match repeated sequence (maximizing regexp). repeated
784-
group should end with a MAX_UNTIL code */
785-
786-
/* args: <skip> <min> <max> <item> */
784+
/* match repeated sequence (maximizing regexp) */
785+
/* args: <skip> <1=min> <2=max> <3=save> <4=item> */
787786

788787
TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr),
789788
pattern[1], pattern[2]));
@@ -793,7 +792,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
793792

794793
/* match minimum number of items */
795794
while (count < (int) pattern[1]) {
796-
i = SRE_MATCH(state, pattern + 3);
795+
i = SRE_MATCH(state, pattern + 4);
797796
if (i < 0)
798797
return i;
799798
if (!i)
@@ -817,8 +816,13 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
817816
points to the stack */
818817

819818
while (pattern[2] == 65535 || count < (int) pattern[2]) {
819+
void *mark0, *mark1;
820+
if (pattern[3] != 65535) {
821+
mark0 = state->mark[pattern[3]];
822+
mark1 = state->mark[pattern[3]+1];
823+
}
820824
state->stackbase = stack;
821-
i = SRE_MATCH(state, pattern + 3);
825+
i = SRE_MATCH(state, pattern + 4);
822826
state->stackbase = stackbase; /* rewind */
823827
if (i < 0)
824828
return i;
@@ -837,8 +841,14 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
837841
return i; /* out of memory */
838842
}
839843
TRACE(("%8d: stack[%d] = %d\n", PTR(ptr), stack, PTR(ptr)));
840-
state->stack[stack].ptr = ptr;
841-
state->stack[stack].pattern = pattern + pattern[0];
844+
sp = state->stack + stack;
845+
sp->ptr = ptr;
846+
sp->pattern = pattern + pattern[0];
847+
sp->mark = pattern[3];
848+
if (pattern[3] != 65535) {
849+
sp->mark0 = mark0;
850+
sp->mark1 = mark1;
851+
}
842852
stack++;
843853
/* move forward */
844854
ptr = state->ptr;
@@ -855,13 +865,15 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
855865

856866
case SRE_OP_MIN_REPEAT:
857867
/* match repeated sequence (minimizing regexp) */
868+
/* args: <skip> <1=min> <2=max> <3=save> <4=item> */
869+
858870
TRACE(("%8d: min repeat %d %d\n", PTR(ptr),
859871
pattern[1], pattern[2]));
860872
count = 0;
861873
state->ptr = ptr;
862874
/* match minimum number of items */
863875
while (count < (int) pattern[1]) {
864-
i = SRE_MATCH(state, pattern + 3);
876+
i = SRE_MATCH(state, pattern + 4);
865877
if (i < 0)
866878
return i;
867879
if (!i)
@@ -877,7 +889,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
877889
goto success;
878890
}
879891
state->ptr = ptr; /* backtrack */
880-
i = SRE_MATCH(state, pattern + 3);
892+
i = SRE_MATCH(state, pattern + 4);
881893
if (i < 0)
882894
return i;
883895
if (!i)
@@ -940,15 +952,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
940952
}
941953

942954
failure:
955+
TRACE(("%8d: leave (failure)\n", PTR(ptr)));
943956
if (stack-- > stackbase) {
944-
ptr = state->stack[stack].ptr;
945-
pattern = state->stack[stack].pattern;
957+
sp = state->stack + stack;
958+
ptr = sp->ptr;
959+
pattern = sp->pattern;
960+
if (sp->mark != 65535) {
961+
state->mark[sp->mark] = sp->mark0;
962+
state->mark[sp->mark+1] = sp->mark1;
963+
}
946964
TRACE(("%8d: retry (%d)\n", PTR(ptr), stack));
947965
goto retry;
948966
}
949-
TRACE(("%8d: leave (failure)\n", PTR(ptr)));
950-
state->stackbase = stackbase;
951967
state->lastmark = lastmark;
968+
state->stackbase = stackbase;
952969
if (mark)
953970
memcpy(state->mark, mark, state->lastmark*sizeof(void*));
954971
return 0;

Modules/sre.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ typedef struct {
4646
/* stack elements */
4747
SRE_CODE* pattern;
4848
void* ptr;
49+
int mark;
50+
void* mark0;
51+
void* mark1;
4952
} SRE_STACK;
5053

5154
/* FIXME: <fl> shouldn't be a constant, really... */

0 commit comments

Comments
 (0)