Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b1aa195

Browse files
committed
Fredrik Lundh: here's the 96.6% version of SRE
1 parent 0292d78 commit b1aa195

6 files changed

Lines changed: 743 additions & 303 deletions

File tree

Lib/sre.py

Lines changed: 105 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# -*- Mode: Python; tab-width: 4 -*-
21
#
32
# Secret Labs' Regular Expression Engine
43
# $Id$
@@ -7,39 +6,127 @@
76
#
87
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
98
#
10-
# This code can only be used for 1.6 alpha testing. All other use
11-
# require explicit permission from Secret Labs AB.
12-
#
139
# Portions of this engine have been developed in cooperation with
1410
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
1511
# other compatibility work.
1612
#
1713

18-
"""
19-
this is a long string
20-
"""
21-
2214
import sre_compile
2315

16+
# flags
17+
I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
18+
L = LOCALE = sre_compile.SRE_FLAG_LOCALE
19+
M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
20+
S = DOTALL = sre_compile.SRE_FLAG_DOTALL
21+
X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
22+
2423
# --------------------------------------------------------------------
2524
# public interface
2625

27-
def compile(pattern, flags=0):
28-
return sre_compile.compile(pattern, _fixflags(flags))
26+
# FIXME: add docstrings
2927

3028
def match(pattern, string, flags=0):
31-
return compile(pattern, _fixflags(flags)).match(string)
29+
return _compile(pattern, flags).match(string)
3230

3331
def search(pattern, string, flags=0):
34-
return compile(pattern, _fixflags(flags)).search(string)
32+
return _compile(pattern, flags).search(string)
33+
34+
def sub(pattern, repl, string, count=0):
35+
return _compile(pattern).sub(repl, string, count)
36+
37+
def subn(pattern, repl, string, count=0):
38+
return _compile(pattern).subn(repl, string, count)
39+
40+
def split(pattern, string, maxsplit=0):
41+
return _compile(pattern).split(string, maxsplit)
3542

36-
# FIXME: etc
43+
def findall(pattern, string, maxsplit=0):
44+
return _compile(pattern).findall(string, maxsplit)
45+
46+
def compile(pattern, flags=0):
47+
return _compile(pattern, flags)
48+
49+
def escape(pattern):
50+
s = list(pattern)
51+
for i in range(len(pattern)):
52+
c = pattern[i]
53+
if not ("a" <= c <= "z" or "A" <= c <= "Z" or "0" <= c <= "9"):
54+
if c == "\000":
55+
s[i] = "\\000"
56+
else:
57+
s[i] = "\\" + c
58+
return pattern[:0].join(s)
3759

3860
# --------------------------------------------------------------------
39-
# helpers
61+
# internals
62+
63+
_cache = {}
64+
_MAXCACHE = 100
65+
66+
def _compile(pattern, flags=0):
67+
# internal: compile pattern
68+
tp = type(pattern)
69+
if tp not in (type(""), type(u"")):
70+
return pattern
71+
key = (tp, pattern, flags)
72+
try:
73+
return _cache[key]
74+
except KeyError:
75+
pass
76+
p = sre_compile.compile(pattern, flags)
77+
if len(_cache) >= _MAXCACHE:
78+
_cache.clear()
79+
_cache[key] = p
80+
return p
81+
82+
def _sub(pattern, template, string, count=0):
83+
# internal: pattern.sub implementation hook
84+
return _subn(pattern, template, string, count)[0]
85+
86+
def _expand(match, template):
87+
# internal: expand template
88+
return template # FIXME
4089

41-
def _fixflags(flags):
42-
# convert flag bitmask to sequence
43-
assert not flags
44-
return ()
90+
def _subn(pattern, template, string, count=0):
91+
# internal: pattern.subn implementation hook
92+
if callable(template):
93+
filter = callable
94+
else:
95+
# FIXME: prepare template
96+
def filter(match, template=template):
97+
return _expand(match, template)
98+
n = i = 0
99+
s = []
100+
append = s.append
101+
c = pattern.cursor(string)
102+
while not count or n < count:
103+
m = c.search()
104+
if not m:
105+
break
106+
j = m.start()
107+
if j > i:
108+
append(string[i:j])
109+
append(filter(m))
110+
i = m.end()
111+
n = n + 1
112+
if i < len(string):
113+
append(string[i:])
114+
return string[:0].join(s), n
45115

116+
def _split(pattern, string, maxsplit=0):
117+
# internal: pattern.split implementation hook
118+
n = i = 0
119+
s = []
120+
append = s.append
121+
c = pattern.cursor(string)
122+
while not maxsplit or n < maxsplit:
123+
m = c.search()
124+
if not m:
125+
break
126+
j = m.start()
127+
append(string[i:j])
128+
i = m.end()
129+
n = n + 1
130+
if i < len(string):
131+
append(string[i:])
132+
return s

Lib/sre_compile.py

Lines changed: 70 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@
1414
# other compatibility work.
1515
#
1616

17-
# FIXME: <fl> formalize (objectify?) and document the compiler code
18-
# format, so that other frontends can use this compiler
19-
2017
import array, string, sys
2118

2219
import _sre
@@ -45,64 +42,70 @@ def append(self, code):
4542
self.data.append(code)
4643
def todata(self):
4744
# print self.data
48-
return array.array(WORDSIZE, self.data).tostring()
49-
50-
def _lower(literal):
51-
# return _sre._lower(literal) # FIXME
52-
return string.lower(literal)
45+
try:
46+
return array.array(WORDSIZE, self.data).tostring()
47+
except OverflowError:
48+
print self.data
49+
raise
5350

54-
def _compile(code, pattern, flags):
51+
def _compile(code, pattern, flags, level=0):
5552
append = code.append
5653
for op, av in pattern:
5754
if op is ANY:
58-
if "s" in flags:
59-
append(CODES[op]) # any character at all!
55+
if flags & SRE_FLAG_DOTALL:
56+
append(OPCODES[op]) # any character at all!
6057
else:
61-
append(CODES[NOT_LITERAL])
62-
append(10)
58+
append(OPCODES[CATEGORY])
59+
append(CHCODES[CATEGORY_NOT_LINEBREAK])
6360
elif op in (SUCCESS, FAILURE):
64-
append(CODES[op])
61+
append(OPCODES[op])
6562
elif op is AT:
66-
append(CODES[op])
67-
append(POSITIONS[av])
63+
append(OPCODES[op])
64+
if flags & SRE_FLAG_MULTILINE:
65+
append(ATCODES[AT_MULTILINE[av]])
66+
else:
67+
append(ATCODES[av])
6868
elif op is BRANCH:
69-
append(CODES[op])
69+
append(OPCODES[op])
7070
tail = []
7171
for av in av[1]:
7272
skip = len(code); append(0)
73-
_compile(code, av, flags)
74-
append(CODES[JUMP])
73+
_compile(code, av, flags, level)
74+
append(OPCODES[JUMP])
7575
tail.append(len(code)); append(0)
7676
code[skip] = len(code) - skip
7777
append(0) # end of branch
7878
for tail in tail:
7979
code[tail] = len(code) - tail
8080
elif op is CALL:
81-
append(CODES[op])
81+
append(OPCODES[op])
8282
skip = len(code); append(0)
83-
_compile(code, av, flags)
84-
append(CODES[SUCCESS])
83+
_compile(code, av, flags, level+1)
84+
append(OPCODES[SUCCESS])
8585
code[skip] = len(code) - skip
8686
elif op is CATEGORY: # not used by current parser
87-
append(CODES[op])
88-
append(CATEGORIES[av])
87+
append(OPCODES[op])
88+
if flags & SRE_FLAG_LOCALE:
89+
append(CH_LOCALE[CHCODES[av]])
90+
else:
91+
append(CHCODES[av])
8992
elif op is GROUP:
90-
if "i" in flags:
91-
append(CODES[MAP_IGNORE[op]])
93+
if flags & SRE_FLAG_IGNORECASE:
94+
append(OPCODES[OP_IGNORE[op]])
9295
else:
93-
append(CODES[op])
94-
append(av)
96+
append(OPCODES[op])
97+
append(av-1)
9598
elif op is IN:
96-
if "i" in flags:
97-
append(CODES[MAP_IGNORE[op]])
99+
if flags & SRE_FLAG_IGNORECASE:
100+
append(OPCODES[OP_IGNORE[op]])
98101
def fixup(literal):
99-
return ord(_lower(literal))
102+
return ord(literal.lower())
100103
else:
101-
append(CODES[op])
104+
append(OPCODES[op])
102105
fixup = ord
103106
skip = len(code); append(0)
104107
for op, av in av:
105-
append(CODES[op])
108+
append(OPCODES[op])
106109
if op is NEGATE:
107110
pass
108111
elif op is LITERAL:
@@ -111,77 +114,80 @@ def fixup(literal):
111114
append(fixup(av[0]))
112115
append(fixup(av[1]))
113116
elif op is CATEGORY:
114-
append(CATEGORIES[av])
117+
if flags & SRE_FLAG_LOCALE:
118+
append(CH_LOCALE[CHCODES[av]])
119+
else:
120+
append(CHCODES[av])
115121
else:
116122
raise ValueError, "unsupported set operator"
117-
append(CODES[FAILURE])
123+
append(OPCODES[FAILURE])
118124
code[skip] = len(code) - skip
119125
elif op in (LITERAL, NOT_LITERAL):
120-
if "i" in flags:
121-
append(CODES[MAP_IGNORE[op]])
122-
append(ord(_lower(av)))
126+
if flags & SRE_FLAG_IGNORECASE:
127+
append(OPCODES[OP_IGNORE[op]])
128+
append(ord(av.lower()))
123129
else:
124-
append(CODES[op])
130+
append(OPCODES[op])
125131
append(ord(av))
126132
elif op is MARK:
127-
append(CODES[op])
133+
append(OPCODES[op])
128134
append(av)
129135
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
130136
lo, hi = av[2].getwidth()
131137
if lo == 0:
132138
raise SyntaxError, "cannot repeat zero-width items"
133139
if lo == hi == 1 and op is MAX_REPEAT:
134-
append(CODES[MAX_REPEAT_ONE])
140+
append(OPCODES[MAX_REPEAT_ONE])
135141
skip = len(code); append(0)
136142
append(av[0])
137143
append(av[1])
138-
_compile(code, av[2], flags)
139-
append(CODES[SUCCESS])
144+
_compile(code, av[2], flags, level+1)
145+
append(OPCODES[SUCCESS])
140146
code[skip] = len(code) - skip
141147
else:
142-
append(CODES[op])
148+
append(OPCODES[op])
143149
skip = len(code); append(0)
144150
append(av[0])
145151
append(av[1])
146-
_compile(code, av[2], flags)
152+
_compile(code, av[2], flags, level+1)
147153
if op is MIN_REPEAT:
148-
append(CODES[MIN_UNTIL])
154+
append(OPCODES[MIN_UNTIL])
149155
else:
150-
# FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
151-
append(CODES[MAX_UNTIL])
156+
append(OPCODES[MAX_UNTIL])
152157
code[skip] = len(code) - skip
153158
elif op is SUBPATTERN:
154-
## group = av[0]
155-
## if group:
156-
## append(CODES[MARK])
157-
## append((group-1)*2)
158-
_compile(code, av[1], flags)
159-
## if group:
160-
## append(CODES[MARK])
161-
## append((group-1)*2+1)
159+
group = av[0]
160+
if group:
161+
append(OPCODES[MARK])
162+
append((group-1)*2)
163+
_compile(code, av[1], flags, level+1)
164+
if group:
165+
append(OPCODES[MARK])
166+
append((group-1)*2+1)
162167
else:
163168
raise ValueError, ("unsupported operand type", op)
164169

165-
def compile(p, flags=()):
170+
def compile(p, flags=0):
166171
# convert pattern list to internal format
167172
if type(p) in (type(""), type(u"")):
168173
import sre_parse
169174
pattern = p
170175
p = sre_parse.parse(p)
171176
else:
172177
pattern = None
173-
# print p.getwidth()
174-
# print p
178+
flags = p.pattern.flags | flags
175179
code = Code()
176-
_compile(code, p.data, p.pattern.flags)
177-
code.append(CODES[SUCCESS])
178-
# print list(code.data)
180+
_compile(code, p.data, flags)
181+
code.append(OPCODES[SUCCESS])
179182
data = code.todata()
180183
if 0: # debugging
181184
print
182185
print "-" * 68
183186
import sre_disasm
184187
sre_disasm.disasm(data)
185188
print "-" * 68
186-
# print len(data), p.pattern.groups, len(p.pattern.groupdict)
187-
return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)
189+
return _sre.compile(
190+
pattern, flags,
191+
data,
192+
p.pattern.groups-1, p.pattern.groupdict
193+
)

0 commit comments

Comments
 (0)