Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 43b3b49

Browse files
author
Fredrik Lundh
committed
- fixed lookahead assertions (#10, #11, #12)
- untabified sre_constants.py
1 parent a4657f7 commit 43b3b49

6 files changed

Lines changed: 146 additions & 90 deletions

File tree

Lib/sre_compile.py

Lines changed: 50 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -26,52 +26,12 @@ def _compile(code, pattern, flags):
2626
# internal: compile a (sub)pattern
2727
emit = code.append
2828
for op, av in pattern:
29-
if op is ANY:
30-
if flags & SRE_FLAG_DOTALL:
31-
emit(OPCODES[op])
32-
else:
33-
emit(OPCODES[CATEGORY])
34-
emit(CHCODES[CATEGORY_NOT_LINEBREAK])
35-
elif op in (SUCCESS, FAILURE):
36-
emit(OPCODES[op])
37-
elif op is AT:
38-
emit(OPCODES[op])
39-
if flags & SRE_FLAG_MULTILINE:
40-
emit(ATCODES[AT_MULTILINE[av]])
41-
else:
42-
emit(ATCODES[av])
43-
elif op is BRANCH:
44-
emit(OPCODES[op])
45-
tail = []
46-
for av in av[1]:
47-
skip = len(code); emit(0)
48-
_compile(code, av, flags)
49-
emit(OPCODES[JUMP])
50-
tail.append(len(code)); emit(0)
51-
code[skip] = len(code) - skip
52-
emit(0) # end of branch
53-
for tail in tail:
54-
code[tail] = len(code) - tail
55-
elif op is CALL:
56-
emit(OPCODES[op])
57-
skip = len(code); emit(0)
58-
_compile(code, av, flags)
59-
emit(OPCODES[SUCCESS])
60-
code[skip] = len(code) - skip
61-
elif op is CATEGORY:
62-
emit(OPCODES[op])
63-
if flags & SRE_FLAG_LOCALE:
64-
emit(CHCODES[CH_LOCALE[av]])
65-
elif flags & SRE_FLAG_UNICODE:
66-
emit(CHCODES[CH_UNICODE[av]])
67-
else:
68-
emit(CHCODES[av])
69-
elif op is GROUP:
29+
if op in (LITERAL, NOT_LITERAL):
7030
if flags & SRE_FLAG_IGNORECASE:
7131
emit(OPCODES[OP_IGNORE[op]])
7232
else:
7333
emit(OPCODES[op])
74-
emit(av-1)
34+
emit(ord(av))
7535
elif op is IN:
7636
if flags & SRE_FLAG_IGNORECASE:
7737
emit(OPCODES[OP_IGNORE[op]])
@@ -101,15 +61,12 @@ def fixup(literal, flags=flags):
10161
raise error, "internal: unsupported set operator"
10262
emit(OPCODES[FAILURE])
10363
code[skip] = len(code) - skip
104-
elif op in (LITERAL, NOT_LITERAL):
105-
if flags & SRE_FLAG_IGNORECASE:
106-
emit(OPCODES[OP_IGNORE[op]])
107-
else:
64+
elif op is ANY:
65+
if flags & SRE_FLAG_DOTALL:
10866
emit(OPCODES[op])
109-
emit(ord(av))
110-
elif op is MARK:
111-
emit(OPCODES[op])
112-
emit(av)
67+
else:
68+
emit(OPCODES[CATEGORY])
69+
emit(CHCODES[CATEGORY_NOT_LINEBREAK])
11370
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
11471
if flags & SRE_FLAG_TEMPLATE:
11572
emit(OPCODES[REPEAT])
@@ -150,6 +107,49 @@ def fixup(literal, flags=flags):
150107
if group:
151108
emit(OPCODES[MARK])
152109
emit((group-1)*2+1)
110+
elif op in (SUCCESS, FAILURE):
111+
emit(OPCODES[op])
112+
elif op in (ASSERT, ASSERT_NOT, CALL):
113+
emit(OPCODES[op])
114+
skip = len(code); emit(0)
115+
_compile(code, av, flags)
116+
emit(OPCODES[SUCCESS])
117+
code[skip] = len(code) - skip
118+
elif op is AT:
119+
emit(OPCODES[op])
120+
if flags & SRE_FLAG_MULTILINE:
121+
emit(ATCODES[AT_MULTILINE[av]])
122+
else:
123+
emit(ATCODES[av])
124+
elif op is BRANCH:
125+
emit(OPCODES[op])
126+
tail = []
127+
for av in av[1]:
128+
skip = len(code); emit(0)
129+
_compile(code, av, flags)
130+
emit(OPCODES[JUMP])
131+
tail.append(len(code)); emit(0)
132+
code[skip] = len(code) - skip
133+
emit(0) # end of branch
134+
for tail in tail:
135+
code[tail] = len(code) - tail
136+
elif op is CATEGORY:
137+
emit(OPCODES[op])
138+
if flags & SRE_FLAG_LOCALE:
139+
emit(CHCODES[CH_LOCALE[av]])
140+
elif flags & SRE_FLAG_UNICODE:
141+
emit(CHCODES[CH_UNICODE[av]])
142+
else:
143+
emit(CHCODES[av])
144+
elif op is GROUP:
145+
if flags & SRE_FLAG_IGNORECASE:
146+
emit(OPCODES[OP_IGNORE[op]])
147+
else:
148+
emit(OPCODES[op])
149+
emit(av-1)
150+
elif op is MARK:
151+
emit(OPCODES[op])
152+
emit(av)
153153
else:
154154
raise ValueError, ("unsupported operand type", op)
155155

Lib/sre_constants.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class error(Exception):
2323

2424
ANY = "any"
2525
ASSERT = "assert"
26+
ASSERT_NOT = "assert_not"
2627
AT = "at"
2728
BRANCH = "branch"
2829
CALL = "call"
@@ -81,7 +82,7 @@ class error(Exception):
8182
FAILURE, SUCCESS,
8283

8384
ANY,
84-
ASSERT,
85+
ASSERT, ASSERT_NOT,
8586
AT,
8687
BRANCH,
8788
CALL,
@@ -121,8 +122,8 @@ def makedict(list):
121122
d = {}
122123
i = 0
123124
for item in list:
124-
d[item] = i
125-
i = i + 1
125+
d[item] = i
126+
i = i + 1
126127
return d
127128

128129
OPCODES = makedict(OPCODES)
@@ -176,12 +177,27 @@ def makedict(list):
176177
if __name__ == "__main__":
177178
import string
178179
def dump(f, d, prefix):
179-
items = d.items()
180-
items.sort(lambda a, b: cmp(a[1], b[1]))
181-
for k, v in items:
182-
f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
180+
items = d.items()
181+
items.sort(lambda a, b: cmp(a[1], b[1]))
182+
for k, v in items:
183+
f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
183184
f = open("sre_constants.h", "w")
184-
f.write("/* generated from sre_constants.py */\n")
185+
f.write("""\
186+
/*
187+
* Secret Labs' Regular Expression Engine
188+
*
189+
* regular expression matching engine
190+
*
191+
* NOTE: This file is generated by sre_constants.py. If you need
192+
* to change anything in here, edit sre_constants.py and run it.
193+
*
194+
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
195+
*
196+
* See the _sre.c file for information on usage and redistribution.
197+
*/
198+
199+
""")
200+
185201
dump(f, OPCODES, "SRE_OP")
186202
dump(f, ATCODES, "SRE")
187203
dump(f, CHCODES, "SRE")

Lib/sre_parse.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,25 @@ def _parse(source, state, flags=0):
470470
if source.next is None or source.next == ")":
471471
break
472472
source.get()
473+
elif source.next in ("=", "!"):
474+
# lookahead assertions
475+
char = source.get()
476+
b = []
477+
while 1:
478+
p = _parse(source, state, flags)
479+
if source.next == ")":
480+
if b:
481+
b.append(p)
482+
p = _branch(state, b)
483+
if char == "=":
484+
subpattern.append((ASSERT, p))
485+
else:
486+
subpattern.append((ASSERT_NOT, p))
487+
break
488+
elif source.match("|"):
489+
b.append(p)
490+
else:
491+
raise error, "pattern not properly closed"
473492
else:
474493
# flags
475494
while FLAGS.has_key(source.next):

Lib/test/output/test_sre

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,4 @@ test_support -- test failed re module cPickle
66
=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/'
77
=== grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a'
88
=== grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A'
9-
=== Syntax error: ('a(?!b).', 'abad', 0, 'found', 'ad')
10-
=== Syntax error: ('a(?=d).', 'abad', 0, 'found', 'ad')
11-
=== Syntax error: ('a(?=c|d).', 'abad', 0, 'found', 'ad')
129
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')

Modules/_sre.c

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
* 00-06-28 fl fixed findall (0.9.1)
2121
* 00-06-29 fl fixed split, added more scanner features (0.9.2)
2222
* 00-06-30 fl tuning, fast search (0.9.3)
23+
* 00-06-30 fl added assert (lookahead) primitives (0.9.4)
2324
*
2425
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
2526
*
@@ -30,7 +31,7 @@
3031

3132
#ifndef SRE_RECURSIVE
3233

33-
char copyright[] = " SRE 0.9.3 Copyright (c) 1997-2000 by Secret Labs AB ";
34+
char copyright[] = " SRE 0.9.4 Copyright (c) 1997-2000 by Secret Labs AB ";
3435

3536
#include "Python.h"
3637

@@ -576,21 +577,31 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
576577
pattern += pattern[0];
577578
break;
578579

579-
#if 0
580-
case SRE_OP_CALL:
581-
/* match subpattern, without backtracking */
580+
case SRE_OP_ASSERT:
581+
/* assert subpattern */
582582
/* args: <skip> <pattern> */
583-
TRACE(("%8d: subpattern\n", PTR(ptr)));
583+
TRACE(("%8d: assert subpattern\n", PTR(ptr)));
584584
state->ptr = ptr;
585585
i = SRE_MATCH(state, pattern + 1);
586586
if (i < 0)
587587
return i;
588588
if (!i)
589589
goto failure;
590590
pattern += pattern[0];
591-
ptr = state->ptr;
592591
break;
593-
#endif
592+
593+
case SRE_OP_ASSERT_NOT:
594+
/* assert not subpattern */
595+
/* args: <skip> <pattern> */
596+
TRACE(("%8d: assert not subpattern\n", PTR(ptr)));
597+
state->ptr = ptr;
598+
i = SRE_MATCH(state, pattern + 1);
599+
if (i < 0)
600+
return i;
601+
if (i)
602+
goto failure;
603+
pattern += pattern[0];
604+
break;
594605

595606
#if 0
596607
case SRE_OP_MAX_REPEAT_ONE:

Modules/sre_constants.h

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,42 @@
1-
/* generated from sre_constants.py */
1+
/*
2+
* Secret Labs' Regular Expression Engine
3+
*
4+
* regular expression matching engine
5+
*
6+
* NOTE: This file is generated by sre_constants.py. If you need
7+
* to change anything in here, edit sre_constants.py and run it.
8+
*
9+
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
10+
*
11+
* See the _sre.c file for information on usage and redistribution.
12+
*/
13+
214
#define SRE_OP_FAILURE 0
315
#define SRE_OP_SUCCESS 1
416
#define SRE_OP_ANY 2
517
#define SRE_OP_ASSERT 3
6-
#define SRE_OP_AT 4
7-
#define SRE_OP_BRANCH 5
8-
#define SRE_OP_CALL 6
9-
#define SRE_OP_CATEGORY 7
10-
#define SRE_OP_GROUP 8
11-
#define SRE_OP_GROUP_IGNORE 9
12-
#define SRE_OP_IN 10
13-
#define SRE_OP_IN_IGNORE 11
14-
#define SRE_OP_INFO 12
15-
#define SRE_OP_JUMP 13
16-
#define SRE_OP_LITERAL 14
17-
#define SRE_OP_LITERAL_IGNORE 15
18-
#define SRE_OP_MARK 16
19-
#define SRE_OP_MAX_REPEAT 17
20-
#define SRE_OP_MAX_REPEAT_ONE 18
21-
#define SRE_OP_MIN_REPEAT 19
22-
#define SRE_OP_NOT_LITERAL 20
23-
#define SRE_OP_NOT_LITERAL_IGNORE 21
24-
#define SRE_OP_NEGATE 22
25-
#define SRE_OP_RANGE 23
26-
#define SRE_OP_REPEAT 24
18+
#define SRE_OP_ASSERT_NOT 4
19+
#define SRE_OP_AT 5
20+
#define SRE_OP_BRANCH 6
21+
#define SRE_OP_CALL 7
22+
#define SRE_OP_CATEGORY 8
23+
#define SRE_OP_GROUP 9
24+
#define SRE_OP_GROUP_IGNORE 10
25+
#define SRE_OP_IN 11
26+
#define SRE_OP_IN_IGNORE 12
27+
#define SRE_OP_INFO 13
28+
#define SRE_OP_JUMP 14
29+
#define SRE_OP_LITERAL 15
30+
#define SRE_OP_LITERAL_IGNORE 16
31+
#define SRE_OP_MARK 17
32+
#define SRE_OP_MAX_REPEAT 18
33+
#define SRE_OP_MAX_REPEAT_ONE 19
34+
#define SRE_OP_MIN_REPEAT 20
35+
#define SRE_OP_NOT_LITERAL 21
36+
#define SRE_OP_NOT_LITERAL_IGNORE 22
37+
#define SRE_OP_NEGATE 23
38+
#define SRE_OP_RANGE 24
39+
#define SRE_OP_REPEAT 25
2740
#define SRE_AT_BEGINNING 0
2841
#define SRE_AT_BEGINNING_LINE 1
2942
#define SRE_AT_BOUNDARY 2

0 commit comments

Comments
 (0)