Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e186983

Browse files
author
Fredrik Lundh
committed
final 0.9.8 updates:
-- added REPEAT_ONE operator -- added ANY_ALL operator (used to represent "(?s).")
1 parent fb06539 commit e186983

6 files changed

Lines changed: 105 additions & 68 deletions

File tree

Lib/sre.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,10 @@ def _compile(pattern, flags=0):
9898
return _cache[key]
9999
except KeyError:
100100
pass
101-
p = sre_compile.compile(pattern, flags)
101+
try:
102+
p = sre_compile.compile(pattern, flags)
103+
except error, v:
104+
raise error, v # invalid expression
102105
if len(_cache) >= _MAXCACHE:
103106
_cache.clear()
104107
_cache[key] = p

Lib/sre_compile.py

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,13 @@ def _charset(charset, fixup=None):
7373
return out
7474
return charset
7575

76+
def _simple(av):
77+
# check if av is a "simple" operator
78+
lo, hi = av[2].getwidth()
79+
if lo == 0:
80+
raise error, "nothing to repeat"
81+
return lo == hi == 1 and av[2][0][0] != SUBPATTERN
82+
7683
def _compile(code, pattern, flags):
7784
# internal: compile a (sub)pattern
7885
emit = code.append
@@ -116,10 +123,9 @@ def fixup(literal, flags=flags):
116123
code[skip] = len(code) - skip
117124
elif op is ANY:
118125
if flags & SRE_FLAG_DOTALL:
119-
emit(OPCODES[op])
126+
emit(OPCODES[ANY_ALL])
120127
else:
121-
emit(OPCODES[CATEGORY])
122-
emit(CHCODES[CATEGORY_NOT_LINEBREAK])
128+
emit(OPCODES[ANY])
123129
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
124130
if flags & SRE_FLAG_TEMPLATE:
125131
raise error, "internal: unsupported template operator"
@@ -130,30 +136,25 @@ def fixup(literal, flags=flags):
130136
_compile(code, av[2], flags)
131137
emit(OPCODES[SUCCESS])
132138
code[skip] = len(code) - skip
139+
elif _simple(av) and op == MAX_REPEAT:
140+
emit(OPCODES[REPEAT_ONE])
141+
skip = len(code); emit(0)
142+
emit(av[0])
143+
emit(av[1])
144+
_compile(code, av[2], flags)
145+
emit(OPCODES[SUCCESS])
146+
code[skip] = len(code) - skip
133147
else:
134-
lo, hi = av[2].getwidth()
135-
if lo == 0:
136-
raise error, "nothing to repeat"
137-
if 0 and lo == hi == 1 and op is MAX_REPEAT:
138-
# FIXME: <fl> fast and wrong (but we'll fix that)
139-
emit(OPCODES[REPEAT_ONE])
140-
skip = len(code); emit(0)
141-
emit(av[0])
142-
emit(av[1])
143-
_compile(code, av[2], flags)
144-
emit(OPCODES[SUCCESS])
145-
code[skip] = len(code) - skip
148+
emit(OPCODES[REPEAT])
149+
skip = len(code); emit(0)
150+
emit(av[0])
151+
emit(av[1])
152+
_compile(code, av[2], flags)
153+
code[skip] = len(code) - skip
154+
if op == MAX_REPEAT:
155+
emit(OPCODES[MAX_UNTIL])
146156
else:
147-
emit(OPCODES[REPEAT])
148-
skip = len(code); emit(0)
149-
emit(av[0])
150-
emit(av[1])
151-
_compile(code, av[2], flags)
152-
code[skip] = len(code) - skip
153-
if op == MAX_REPEAT:
154-
emit(OPCODES[MAX_UNTIL])
155-
else:
156-
emit(OPCODES[MIN_UNTIL])
157+
emit(OPCODES[MIN_UNTIL])
157158
elif op is SUBPATTERN:
158159
if av[0]:
159160
emit(OPCODES[MARK])

Lib/sre_constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class error(Exception):
2020
SUCCESS = "success"
2121

2222
ANY = "any"
23+
ANY_ALL = "any_all"
2324
ASSERT = "assert"
2425
ASSERT_NOT = "assert_not"
2526
AT = "at"
@@ -81,7 +82,7 @@ class error(Exception):
8182
# failure=0 success=1 (just because it looks better that way :-)
8283
FAILURE, SUCCESS,
8384

84-
ANY,
85+
ANY, ANY_ALL,
8586
ASSERT, ASSERT_NOT,
8687
AT,
8788
BRANCH,

Lib/sre_parse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def getwidth(self):
142142
for av in av[1]:
143143
l, h = av.getwidth()
144144
i = min(i, l)
145-
j = min(j, h)
145+
j = max(j, h)
146146
lo = lo + i
147147
hi = hi + j
148148
elif op is CALL:

Modules/_sre.c

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
448448
int i, count;
449449
SRE_REPEAT* rp;
450450
int lastmark;
451+
SRE_CODE chr;
451452

452453
SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
453454

@@ -525,8 +526,17 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
525526
break;
526527

527528
case SRE_OP_ANY:
528-
/* match anything */
529+
/* match anything (except a newline) */
529530
/* <ANY> */
531+
TRACE(("%8d: anything (except newline)\n", PTR(ptr)));
532+
if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
533+
return 0;
534+
ptr++;
535+
break;
536+
537+
case SRE_OP_ANY_ALL:
538+
/* match anything */
539+
/* <ANY_ALL> */
530540
TRACE(("%8d: anything\n", PTR(ptr)));
531541
if (ptr >= end)
532542
return 0;
@@ -695,69 +705,89 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
695705
TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
696706
pattern[1], pattern[2]));
697707

708+
if (ptr + pattern[1] > end)
709+
return 0; /* cannot match */
710+
698711
count = 0;
699712

700-
if (pattern[3] == SRE_OP_ANY) {
713+
switch (pattern[3]) {
714+
715+
case SRE_OP_ANY:
716+
/* repeated wildcard. */
717+
while (count < (int) pattern[2]) {
718+
if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
719+
break;
720+
ptr++;
721+
count++;
722+
}
723+
break;
724+
725+
case SRE_OP_ANY_ALL:
701726
/* repeated wildcard. skip to the end of the target
702727
string, and backtrack from there */
703-
/* FIXME: must look for line endings */
704728
if (ptr + pattern[1] > end)
705729
return 0; /* cannot match */
706730
count = pattern[2];
707731
if (count > end - ptr)
708732
count = end - ptr;
709733
ptr += count;
734+
break;
710735

711-
} else if (pattern[3] == SRE_OP_LITERAL) {
736+
case SRE_OP_LITERAL:
712737
/* repeated literal */
713-
SRE_CODE chr = pattern[4];
738+
chr = pattern[4];
714739
while (count < (int) pattern[2]) {
715740
if (ptr >= end || (SRE_CODE) ptr[0] != chr)
716741
break;
717742
ptr++;
718743
count++;
719744
}
745+
break;
720746

721-
} else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
747+
case SRE_OP_LITERAL_IGNORE:
722748
/* repeated literal */
723-
SRE_CODE chr = pattern[4];
749+
chr = pattern[4];
724750
while (count < (int) pattern[2]) {
725751
if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr)
726752
break;
727753
ptr++;
728754
count++;
729755
}
756+
break;
730757

731-
} else if (pattern[3] == SRE_OP_NOT_LITERAL) {
758+
case SRE_OP_NOT_LITERAL:
732759
/* repeated non-literal */
733-
SRE_CODE chr = pattern[4];
760+
chr = pattern[4];
734761
while (count < (int) pattern[2]) {
735762
if (ptr >= end || (SRE_CODE) ptr[0] == chr)
736763
break;
737764
ptr++;
738765
count++;
739766
}
740-
741-
} else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
767+
break;
768+
769+
case SRE_OP_NOT_LITERAL_IGNORE:
742770
/* repeated non-literal */
743-
SRE_CODE chr = pattern[4];
771+
chr = pattern[4];
744772
while (count < (int) pattern[2]) {
745773
if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr)
746774
break;
747775
ptr++;
748776
count++;
749777
}
778+
break;
750779

751-
} else if (pattern[3] == SRE_OP_IN) {
780+
case SRE_OP_IN:
752781
/* repeated set */
753782
while (count < (int) pattern[2]) {
754783
if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
755784
break;
756785
ptr++;
757786
count++;
758787
}
788+
break;
759789

760-
} else {
790+
default:
761791
/* repeated single character pattern */
762792
state->ptr = ptr;
763793
while (count < (int) pattern[2]) {
@@ -770,6 +800,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
770800
}
771801
state->ptr = ptr;
772802
ptr += count;
803+
break;
773804
}
774805

775806
/* when we arrive here, count contains the number of
@@ -791,7 +822,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
791822
} else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
792823
/* tail starts with a literal. skip positions where
793824
the rest of the pattern cannot possibly match */
794-
SRE_CODE chr = pattern[pattern[0]+1];
825+
chr = pattern[pattern[0]+1];
795826
TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
796827
for (;;) {
797828
TRACE(("%8d: scan for tail match\n", PTR(ptr)));

Modules/sre_constants.h

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,31 +14,32 @@
1414
#define SRE_OP_FAILURE 0
1515
#define SRE_OP_SUCCESS 1
1616
#define SRE_OP_ANY 2
17-
#define SRE_OP_ASSERT 3
18-
#define SRE_OP_ASSERT_NOT 4
19-
#define SRE_OP_AT 5
20-
#define SRE_OP_BRANCH 6
21-
#define SRE_OP_CALL 7
22-
#define SRE_OP_CATEGORY 8
23-
#define SRE_OP_CHARSET 9
24-
#define SRE_OP_GROUPREF 10
25-
#define SRE_OP_GROUPREF_IGNORE 11
26-
#define SRE_OP_IN 12
27-
#define SRE_OP_IN_IGNORE 13
28-
#define SRE_OP_INFO 14
29-
#define SRE_OP_JUMP 15
30-
#define SRE_OP_LITERAL 16
31-
#define SRE_OP_LITERAL_IGNORE 17
32-
#define SRE_OP_MARK 18
33-
#define SRE_OP_MAX_UNTIL 19
34-
#define SRE_OP_MIN_UNTIL 20
35-
#define SRE_OP_NOT_LITERAL 21
36-
#define SRE_OP_NOT_LITERAL_IGNORE 22
37-
#define SRE_OP_NEGATE 23
38-
#define SRE_OP_RANGE 24
39-
#define SRE_OP_REPEAT 25
40-
#define SRE_OP_REPEAT_ONE 26
41-
#define SRE_OP_SUBPATTERN 27
17+
#define SRE_OP_ANY_ALL 3
18+
#define SRE_OP_ASSERT 4
19+
#define SRE_OP_ASSERT_NOT 5
20+
#define SRE_OP_AT 6
21+
#define SRE_OP_BRANCH 7
22+
#define SRE_OP_CALL 8
23+
#define SRE_OP_CATEGORY 9
24+
#define SRE_OP_CHARSET 10
25+
#define SRE_OP_GROUPREF 11
26+
#define SRE_OP_GROUPREF_IGNORE 12
27+
#define SRE_OP_IN 13
28+
#define SRE_OP_IN_IGNORE 14
29+
#define SRE_OP_INFO 15
30+
#define SRE_OP_JUMP 16
31+
#define SRE_OP_LITERAL 17
32+
#define SRE_OP_LITERAL_IGNORE 18
33+
#define SRE_OP_MARK 19
34+
#define SRE_OP_MAX_UNTIL 20
35+
#define SRE_OP_MIN_UNTIL 21
36+
#define SRE_OP_NOT_LITERAL 22
37+
#define SRE_OP_NOT_LITERAL_IGNORE 23
38+
#define SRE_OP_NEGATE 24
39+
#define SRE_OP_RANGE 25
40+
#define SRE_OP_REPEAT 26
41+
#define SRE_OP_REPEAT_ONE 27
42+
#define SRE_OP_SUBPATTERN 28
4243
#define SRE_AT_BEGINNING 0
4344
#define SRE_AT_BEGINNING_LINE 1
4445
#define SRE_AT_BOUNDARY 2

0 commit comments

Comments
 (0)