Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit be2211e

Browse files
author
Fredrik Lundh
committed
- fixed split
(test_sre still complains about split, but that's caused by the group reset bug, not split itself) - added more mark slots (should be dynamically allocated, but 100 is better than 32. and checking for the upper limit is better than overwriting the memory ;-) - internal: renamed the cursor helper class - internal: removed some bloat from sre_compile
1 parent 6921817 commit be2211e

4 files changed

Lines changed: 116 additions & 130 deletions

File tree

Lib/sre.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
U = UNICODE = sre_compile.SRE_FLAG_UNICODE
2727

2828
# sre exception
29-
error = sre_parse.error
29+
error = sre_compile.error
3030

3131
# --------------------------------------------------------------------
3232
# public interface
@@ -105,7 +105,7 @@ def filter(match, template=template):
105105
n = i = 0
106106
s = []
107107
append = s.append
108-
c = pattern.cursor(string)
108+
c = pattern.scanner(string)
109109
while not count or n < count:
110110
m = c.search()
111111
if not m:
@@ -127,16 +127,20 @@ def _split(pattern, string, maxsplit=0):
127127
n = i = 0
128128
s = []
129129
append = s.append
130-
c = pattern.cursor(string)
130+
extend = s.extend
131+
c = pattern.scanner(string)
132+
g = c.groups
131133
while not maxsplit or n < maxsplit:
132134
m = c.search()
133135
if not m:
134136
break
135-
j = m.start()
136-
append(string[i:j])
137-
i = m.end()
138-
if i <= j:
139-
break
137+
b, e = m.span()
138+
if e == i:
139+
continue
140+
append(string[i:b])
141+
if g and b != e:
142+
extend(m.groups())
143+
i = e
140144
n = n + 1
141145
if i < len(string):
142146
append(string[i:])

Lib/sre_compile.py

Lines changed: 66 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@
1111
# other compatibility work.
1212
#
1313

14-
import array, string, sys
15-
14+
import array
1615
import _sre
1716

1817
from sre_constants import *
@@ -24,123 +23,101 @@
2423
else:
2524
raise RuntimeError, "cannot find a useable array type"
2625

27-
# FIXME: <fl> should move some optimizations from the parser to here!
28-
29-
class Code:
30-
def __init__(self):
31-
self.data = []
32-
def __len__(self):
33-
return len(self.data)
34-
def __getitem__(self, index):
35-
return self.data[index]
36-
def __setitem__(self, index, code):
37-
self.data[index] = code
38-
def append(self, code):
39-
self.data.append(code)
40-
def todata(self):
41-
# print self.data
42-
try:
43-
return array.array(WORDSIZE, self.data).tostring()
44-
except OverflowError:
45-
print self.data
46-
raise
47-
4826
def _compile(code, pattern, flags):
49-
append = code.append
27+
emit = code.append
5028
for op, av in pattern:
5129
if op is ANY:
5230
if flags & SRE_FLAG_DOTALL:
53-
append(OPCODES[op]) # any character at all!
31+
emit(OPCODES[op])
5432
else:
55-
append(OPCODES[CATEGORY])
56-
append(CHCODES[CATEGORY_NOT_LINEBREAK])
33+
emit(OPCODES[CATEGORY])
34+
emit(CHCODES[CATEGORY_NOT_LINEBREAK])
5735
elif op in (SUCCESS, FAILURE):
58-
append(OPCODES[op])
36+
emit(OPCODES[op])
5937
elif op is AT:
60-
append(OPCODES[op])
38+
emit(OPCODES[op])
6139
if flags & SRE_FLAG_MULTILINE:
62-
append(ATCODES[AT_MULTILINE[av]])
40+
emit(ATCODES[AT_MULTILINE[av]])
6341
else:
64-
append(ATCODES[av])
42+
emit(ATCODES[av])
6543
elif op is BRANCH:
66-
append(OPCODES[op])
44+
emit(OPCODES[op])
6745
tail = []
6846
for av in av[1]:
69-
skip = len(code); append(0)
47+
skip = len(code); emit(0)
7048
_compile(code, av, flags)
71-
## append(OPCODES[SUCCESS])
72-
append(OPCODES[JUMP])
73-
tail.append(len(code)); append(0)
49+
emit(OPCODES[JUMP])
50+
tail.append(len(code)); emit(0)
7451
code[skip] = len(code) - skip
75-
append(0) # end of branch
52+
emit(0) # end of branch
7653
for tail in tail:
7754
code[tail] = len(code) - tail
7855
elif op is CALL:
79-
append(OPCODES[op])
80-
skip = len(code); append(0)
56+
emit(OPCODES[op])
57+
skip = len(code); emit(0)
8158
_compile(code, av, flags)
82-
append(OPCODES[SUCCESS])
59+
emit(OPCODES[SUCCESS])
8360
code[skip] = len(code) - skip
8461
elif op is CATEGORY:
85-
append(OPCODES[op])
62+
emit(OPCODES[op])
8663
if flags & SRE_FLAG_LOCALE:
87-
append(CH_LOCALE[CHCODES[av]])
64+
emit(CH_LOCALE[CHCODES[av]])
8865
elif flags & SRE_FLAG_UNICODE:
89-
append(CH_UNICODE[CHCODES[av]])
66+
emit(CH_UNICODE[CHCODES[av]])
9067
else:
91-
append(CHCODES[av])
68+
emit(CHCODES[av])
9269
elif op is GROUP:
9370
if flags & SRE_FLAG_IGNORECASE:
94-
append(OPCODES[OP_IGNORE[op]])
71+
emit(OPCODES[OP_IGNORE[op]])
9572
else:
96-
append(OPCODES[op])
97-
append(av-1)
73+
emit(OPCODES[op])
74+
emit(av-1)
9875
elif op is IN:
9976
if flags & SRE_FLAG_IGNORECASE:
100-
append(OPCODES[OP_IGNORE[op]])
77+
emit(OPCODES[OP_IGNORE[op]])
10178
def fixup(literal, flags=flags):
10279
return _sre.getlower(ord(literal), flags)
10380
else:
104-
append(OPCODES[op])
81+
emit(OPCODES[op])
10582
fixup = ord
106-
skip = len(code); append(0)
83+
skip = len(code); emit(0)
10784
for op, av in av:
108-
append(OPCODES[op])
85+
emit(OPCODES[op])
10986
if op is NEGATE:
11087
pass
11188
elif op is LITERAL:
112-
append(fixup(av))
89+
emit(fixup(av))
11390
elif op is RANGE:
114-
append(fixup(av[0]))
115-
append(fixup(av[1]))
91+
emit(fixup(av[0]))
92+
emit(fixup(av[1]))
11693
elif op is CATEGORY:
11794
if flags & SRE_FLAG_LOCALE:
118-
append(CH_LOCALE[CHCODES[av]])
95+
emit(CH_LOCALE[CHCODES[av]])
11996
elif flags & SRE_FLAG_UNICODE:
120-
append(CH_UNICODE[CHCODES[av]])
97+
emit(CH_UNICODE[CHCODES[av]])
12198
else:
122-
append(CHCODES[av])
99+
emit(CHCODES[av])
123100
else:
124-
raise ValueError, "unsupported set operator"
125-
append(OPCODES[FAILURE])
101+
raise error, "internal: unsupported set operator"
102+
emit(OPCODES[FAILURE])
126103
code[skip] = len(code) - skip
127104
elif op in (LITERAL, NOT_LITERAL):
128105
if flags & SRE_FLAG_IGNORECASE:
129-
append(OPCODES[OP_IGNORE[op]])
106+
emit(OPCODES[OP_IGNORE[op]])
130107
else:
131-
append(OPCODES[op])
132-
append(ord(av))
108+
emit(OPCODES[op])
109+
emit(ord(av))
133110
elif op is MARK:
134-
append(OPCODES[op])
135-
append(av)
111+
emit(OPCODES[op])
112+
emit(av)
136113
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
137114
if flags & SRE_FLAG_TEMPLATE:
138-
append(OPCODES[REPEAT])
139-
skip = len(code); append(0)
140-
append(av[0])
141-
append(av[1])
115+
emit(OPCODES[REPEAT])
116+
skip = len(code); emit(0)
117+
emit(av[0])
118+
emit(av[1])
142119
_compile(code, av[2], flags)
143-
append(OPCODES[SUCCESS])
120+
emit(OPCODES[SUCCESS])
144121
code[skip] = len(code) - skip
145122
else:
146123
lo, hi = av[2].getwidth()
@@ -149,54 +126,50 @@ def fixup(literal, flags=flags):
149126
if 0 and lo == hi == 1 and op is MAX_REPEAT:
150127
# FIXME: <fl> need a better way to figure out when
151128
# it's safe to use this one (in the parser, probably)
152-
append(OPCODES[MAX_REPEAT_ONE])
153-
skip = len(code); append(0)
154-
append(av[0])
155-
append(av[1])
129+
emit(OPCODES[MAX_REPEAT_ONE])
130+
skip = len(code); emit(0)
131+
emit(av[0])
132+
emit(av[1])
156133
_compile(code, av[2], flags)
157-
append(OPCODES[SUCCESS])
134+
emit(OPCODES[SUCCESS])
158135
code[skip] = len(code) - skip
159136
else:
160-
append(OPCODES[op])
161-
skip = len(code); append(0)
162-
append(av[0])
163-
append(av[1])
137+
emit(OPCODES[op])
138+
skip = len(code); emit(0)
139+
emit(av[0])
140+
emit(av[1])
164141
_compile(code, av[2], flags)
165-
append(OPCODES[SUCCESS])
142+
emit(OPCODES[SUCCESS])
166143
code[skip] = len(code) - skip
167144
elif op is SUBPATTERN:
168145
group = av[0]
169146
if group:
170-
append(OPCODES[MARK])
171-
append((group-1)*2)
147+
emit(OPCODES[MARK])
148+
emit((group-1)*2)
172149
_compile(code, av[1], flags)
173150
if group:
174-
append(OPCODES[MARK])
175-
append((group-1)*2+1)
151+
emit(OPCODES[MARK])
152+
emit((group-1)*2+1)
176153
else:
177154
raise ValueError, ("unsupported operand type", op)
178155

179156
def compile(p, flags=0):
180-
# convert pattern list to internal format
157+
# internal: convert pattern list to internal format
181158
if type(p) in (type(""), type(u"")):
182159
import sre_parse
183160
pattern = p
184161
p = sre_parse.parse(p)
185162
else:
186163
pattern = None
187164
flags = p.pattern.flags | flags
188-
code = Code()
165+
code = []
189166
_compile(code, p.data, flags)
190167
code.append(OPCODES[SUCCESS])
191-
data = code.todata()
192-
if 0: # debugging
193-
print
194-
print "-" * 68
195-
import sre_disasm
196-
sre_disasm.disasm(data)
197-
print "-" * 68
168+
# FIXME: <fl> get rid of this limitation
169+
assert p.pattern.groups <= 100,\
170+
"sorry, but this version only supports 100 named groups"
198171
return _sre.compile(
199172
pattern, flags,
200-
data,
173+
array.array(WORDSIZE, code).tostring(),
201174
p.pattern.groups-1, p.pattern.groupdict
202175
)

0 commit comments

Comments
 (0)