Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6f01398

Browse files
author
Fredrik Lundh
committed
- added lookbehind support (?<=pattern), (?<!pattern).
the pattern must have a fixed width. - got rid of array-module dependencies; the match pro- gram is now stored inside the pattern object, rather than in an extra string buffer. - cleaned up a various of potential leaks, api abuses, and other minors in the engine module. - use mal's new isalnum macro, rather than my own work- around. - untabified test_sre.py. seems like I removed a couple of trailing spaces in the process...
1 parent 40c4868 commit 6f01398

5 files changed

Lines changed: 137 additions & 103 deletions

File tree

Lib/sre_compile.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,10 @@
1010
# other compatibility work.
1111
#
1212

13-
import array
1413
import _sre
1514

1615
from sre_constants import *
1716

18-
# find an array type code that matches the engine's code size
19-
for WORDSIZE in "Hil":
20-
if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
21-
break
22-
else:
23-
raise RuntimeError, "cannot find a useable array type"
24-
2517
MAXCODE = 65535
2618

2719
def _charset(charset, fixup):
@@ -170,7 +162,20 @@ def fixup(literal, flags=flags):
170162
emit((group-1)*2+1)
171163
elif op in (SUCCESS, FAILURE):
172164
emit(OPCODES[op])
173-
elif op in (ASSERT, ASSERT_NOT, CALL):
165+
elif op in (ASSERT, ASSERT_NOT):
166+
emit(OPCODES[op])
167+
skip = len(code); emit(0)
168+
if av[0] >= 0:
169+
emit(0) # look ahead
170+
else:
171+
lo, hi = av[1].getwidth()
172+
if lo != hi:
173+
raise error, "look-behind requires fixed-width pattern"
174+
emit(lo) # look behind
175+
_compile(code, av[1], flags)
176+
emit(OPCODES[SUCCESS])
177+
code[skip] = len(code) - skip
178+
elif op is CALL:
174179
emit(OPCODES[op])
175180
skip = len(code); emit(0)
176181
_compile(code, av, flags)
@@ -305,7 +310,7 @@ def compile(p, flags=0):
305310
indexgroup[i] = k
306311

307312
return _sre.compile(
308-
pattern, flags,
309-
array.array(WORDSIZE, code).tostring(),
310-
p.pattern.groups-1, groupindex, indexgroup
313+
pattern, flags, code,
314+
p.pattern.groups-1,
315+
groupindex, indexgroup
311316
)

Lib/sre_parse.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -482,9 +482,15 @@ def _parse(source, state):
482482
if source.next is None or source.next == ")":
483483
break
484484
source.get()
485-
elif source.next in ("=", "!"):
485+
elif source.next in ("=", "!", "<"):
486486
# lookahead assertions
487487
char = source.get()
488+
dir = 1
489+
if char == "<":
490+
if source.next not in ("=", "!"):
491+
raise error, "syntax error"
492+
dir = -1 # lookbehind
493+
char = source.get()
488494
b = []
489495
while 1:
490496
p = _parse(source, state)
@@ -493,9 +499,9 @@ def _parse(source, state):
493499
b.append(p)
494500
p = _branch(state, b)
495501
if char == "=":
496-
subpattern.append((ASSERT, p))
502+
subpattern.append((ASSERT, (dir, p)))
497503
else:
498-
subpattern.append((ASSERT_NOT, p))
504+
subpattern.append((ASSERT_NOT, (dir, p)))
499505
break
500506
elif source.match("|"):
501507
b.append(p)

Lib/test/test_sre.py

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -35,20 +35,20 @@
3535

3636
try:
3737
assert sre.sub("(?i)b+", "x", "bbbb BBBB") == 'x x'
38-
38+
3939
def bump_num(matchobj):
4040
int_value = int(matchobj.group(0))
4141
return str(int_value + 1)
4242

4343
assert sre.sub(r'\d+', bump_num, '08.2 -2 23x99y') == '9.3 -3 24x100y'
4444
assert sre.sub(r'\d+', bump_num, '08.2 -2 23x99y', 3) == '9.3 -3 23x99y'
45-
45+
4646
assert sre.sub('.', lambda m: r"\n", 'x') == '\\n'
4747
assert sre.sub('.', r"\n", 'x') == '\n'
4848

4949
s = r"\1\1"
5050
assert sre.sub('(.)', s, 'x') == 'xx'
51-
assert sre.sub('(.)', sre.escape(s), 'x') == s
51+
assert sre.sub('(.)', sre.escape(s), 'x') == s
5252
assert sre.sub('(.)', lambda m: s, 'x') == s
5353

5454
assert sre.sub('(?P<a>x)', '\g<a>\g<a>', 'xx') == 'xxxx'
@@ -144,7 +144,7 @@ def bump_num(matchobj):
144144

145145
if verbose:
146146
print 'Running tests on sre.split'
147-
147+
148148
try:
149149
assert sre.split(":", ":a:b::c") == ['', 'a', 'b', '', 'c']
150150
assert sre.split(":*", ":a:b::c") == ['', 'a', 'b', 'c']
@@ -164,7 +164,7 @@ def bump_num(matchobj):
164164
assert sre.split(':', 'a:b:c:d', 2) == ['a', 'b', 'c:d']
165165

166166
assert sre.split("(:)", ":a:b::c", 2) == ['', ':', 'a', ':', 'b::c']
167-
assert sre.split("(:*)", ":a:b::c", 2) == ['', ':', 'a', ':', 'b::c']
167+
assert sre.split("(:*)", ":a:b::c", 2) == ['', ':', 'a', ':', 'b::c']
168168
except AssertionError:
169169
raise TestFailed, "qualified sre.split"
170170

@@ -186,29 +186,29 @@ def bump_num(matchobj):
186186

187187
try:
188188
# No groups at all
189-
m = sre.match('a', 'a') ; assert m.groups() == ()
189+
m = sre.match('a', 'a') ; assert m.groups() == ()
190190
# A single group
191-
m = sre.match('(a)', 'a') ; assert m.groups() == ('a',)
191+
m = sre.match('(a)', 'a') ; assert m.groups() == ('a',)
192192

193193
pat = sre.compile('((a)|(b))(c)?')
194-
assert pat.match('a').groups() == ('a', 'a', None, None)
195-
assert pat.match('b').groups() == ('b', None, 'b', None)
196-
assert pat.match('ac').groups() == ('a', 'a', None, 'c')
197-
assert pat.match('bc').groups() == ('b', None, 'b', 'c')
198-
assert pat.match('bc').groups("") == ('b', "", 'b', 'c')
194+
assert pat.match('a').groups() == ('a', 'a', None, None)
195+
assert pat.match('b').groups() == ('b', None, 'b', None)
196+
assert pat.match('ac').groups() == ('a', 'a', None, 'c')
197+
assert pat.match('bc').groups() == ('b', None, 'b', 'c')
198+
assert pat.match('bc').groups("") == ('b', "", 'b', 'c')
199199
except AssertionError:
200200
raise TestFailed, "match .groups() method"
201201

202202
try:
203203
# A single group
204-
m = sre.match('(a)', 'a')
205-
assert m.group(0) == 'a' ; assert m.group(0) == 'a'
204+
m = sre.match('(a)', 'a')
205+
assert m.group(0) == 'a' ; assert m.group(0) == 'a'
206206
assert m.group(1) == 'a' ; assert m.group(1, 1) == ('a', 'a')
207207

208208
pat = sre.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
209-
assert pat.match('a').group(1, 2, 3) == ('a', None, None)
210-
assert pat.match('b').group('a1', 'b2', 'c3') == (None, 'b', None)
211-
assert pat.match('ac').group(1, 'b2', 3) == ('a', None, 'c')
209+
assert pat.match('a').group(1, 2, 3) == ('a', None, None)
210+
assert pat.match('b').group('a1', 'b2', 'c3') == (None, 'b', None)
211+
assert pat.match('ac').group(1, 'b2', 3) == ('a', None, 'c')
212212
except AssertionError:
213213
raise TestFailed, "match .group() method"
214214

@@ -252,10 +252,10 @@ def bump_num(matchobj):
252252
assert sre.I == sre.IGNORECASE
253253
assert sre.L == sre.LOCALE
254254
assert sre.M == sre.MULTILINE
255-
assert sre.S == sre.DOTALL
256-
assert sre.X == sre.VERBOSE
257-
assert sre.T == sre.TEMPLATE
258-
assert sre.U == sre.UNICODE
255+
assert sre.S == sre.DOTALL
256+
assert sre.X == sre.VERBOSE
257+
assert sre.T == sre.TEMPLATE
258+
assert sre.U == sre.UNICODE
259259
except AssertionError:
260260
raise TestFailed, 're module constants'
261261

@@ -272,23 +272,23 @@ def bump_num(matchobj):
272272
else:
273273
# To save time, only run the first and last 10 tests
274274
#tests = tests[:10] + tests[-10:]
275-
pass
275+
pass
276276

277277
for t in tests:
278278
sys.stdout.flush()
279279
pattern=s=outcome=repl=expected=None
280280
if len(t)==5:
281281
pattern, s, outcome, repl, expected = t
282282
elif len(t)==3:
283-
pattern, s, outcome = t
283+
pattern, s, outcome = t
284284
else:
285285
raise ValueError, ('Test tuples should have 3 or 5 fields',t)
286286

287287
try:
288288
obj=sre.compile(pattern)
289289
except sre.error:
290290
if outcome==SYNTAX_ERROR: pass # Expected a syntax error
291-
else:
291+
else:
292292
print '=== Syntax error:', t
293293
except KeyboardInterrupt: raise KeyboardInterrupt
294294
except:
@@ -356,7 +356,7 @@ def bump_num(matchobj):
356356
# of the match and see if it still succeeds. \B will
357357
# break (because it won't match at the end or start of a
358358
# string), so we'll ignore patterns that feature it.
359-
359+
360360
if pattern[:2]!='\\B' and pattern[-2:]!='\\B':
361361
obj=sre.compile(pattern)
362362
result=obj.search(s, result.start(0), result.end(0)+1)

0 commit comments

Comments
 (0)