Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 7627c0d

Browse files
committed
Added Fredrik Lundh's sre module and its supporting cast.
NOTE: THIS IS VERY ROUGH ALPHA CODE!
1 parent 7a5b796 commit 7627c0d

4 files changed

Lines changed: 856 additions & 0 deletions

File tree

Lib/sre.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# -*- Mode: Python; tab-width: 4 -*-
2+
#
3+
# Secret Labs' Regular Expression Engine
4+
# $Id$
5+
#
6+
# re-compatible interface for the sre matching engine
7+
#
8+
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
9+
#
10+
# This code can only be used for 1.6 alpha testing. All other use
11+
# require explicit permission from Secret Labs AB.
12+
#
13+
# Portions of this engine have been developed in cooperation with
14+
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
15+
# other compatibility work.
16+
#
17+
18+
"""
19+
this is a long string
20+
"""
21+
22+
import sre_compile
23+
24+
# --------------------------------------------------------------------
25+
# public interface
26+
27+
def compile(pattern, flags=0):
28+
return sre_compile.compile(pattern, _fixflags(flags))
29+
30+
def match(pattern, string, flags=0):
31+
return compile(pattern, _fixflags(flags)).match(string)
32+
33+
def search(pattern, string, flags=0):
34+
assert flags == 0
35+
return compile(pattern, _fixflags(flags)).search(string)
36+
37+
# FIXME: etc
38+
39+
# --------------------------------------------------------------------
40+
# helpers
41+
42+
def _fixflags(flags):
43+
# convert flag bitmask to sequence
44+
assert flags == 0
45+
return ()
46+

Lib/sre_compile.py

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
#
2+
# Secret Labs' Regular Expression Engine
3+
# $Id$
4+
#
5+
# convert template to internal format
6+
#
7+
# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
8+
#
9+
# This code can only be used for 1.6 alpha testing. All other use
10+
# require explicit permission from Secret Labs AB.
11+
#
12+
# Portions of this engine have been developed in cooperation with
13+
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
14+
# other compatibility work.
15+
#
16+
17+
# FIXME: <fl> formalize (objectify?) and document the compiler code
18+
# format, so that other frontends can use this compiler
19+
20+
import array, string, sys
21+
22+
import _sre
23+
24+
from sre_constants import *
25+
26+
# find an array type code that matches the engine's code size
27+
for WORDSIZE in "BHil":
28+
if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
29+
break
30+
else:
31+
raise RuntimeError, "cannot find a useable array type"
32+
33+
# FIXME: <fl> should move some optimizations from the parser to here!
34+
35+
class Code:
36+
def __init__(self):
37+
self.data = []
38+
def __len__(self):
39+
return len(self.data)
40+
def __getitem__(self, index):
41+
return self.data[index]
42+
def __setitem__(self, index, code):
43+
self.data[index] = code
44+
def append(self, code):
45+
self.data.append(code)
46+
def todata(self):
47+
# print self.data
48+
return array.array(WORDSIZE, self.data).tostring()
49+
50+
def _lower(literal):
51+
# return _sre._lower(literal) # FIXME
52+
return string.lower(literal)
53+
54+
def _compile(code, pattern, flags):
55+
append = code.append
56+
for op, av in pattern:
57+
if op is ANY:
58+
if "s" in flags:
59+
append(CODES[op]) # any character at all!
60+
else:
61+
append(CODES[NOT_LITERAL])
62+
append(10)
63+
elif op in (SUCCESS, FAILURE):
64+
append(CODES[op])
65+
elif op is AT:
66+
append(CODES[op])
67+
append(POSITIONS[av])
68+
elif op is BRANCH:
69+
append(CODES[op])
70+
tail = []
71+
for av in av[1]:
72+
skip = len(code); append(0)
73+
_compile(code, av, flags)
74+
append(CODES[JUMP])
75+
tail.append(len(code)); append(0)
76+
code[skip] = len(code) - skip
77+
append(0) # end of branch
78+
for tail in tail:
79+
code[tail] = len(code) - tail
80+
elif op is CALL:
81+
append(CODES[op])
82+
skip = len(code); append(0)
83+
_compile(code, av, flags)
84+
append(CODES[SUCCESS])
85+
code[skip] = len(code) - skip
86+
elif op is CATEGORY: # not used by current parser
87+
append(CODES[op])
88+
append(CATEGORIES[av])
89+
elif op is GROUP:
90+
if "i" in flags:
91+
append(CODES[MAP_IGNORE[op]])
92+
else:
93+
append(CODES[op])
94+
append(av)
95+
elif op is IN:
96+
if "i" in flags:
97+
append(CODES[MAP_IGNORE[op]])
98+
def fixup(literal):
99+
return ord(_lower(literal))
100+
else:
101+
append(CODES[op])
102+
fixup = ord
103+
skip = len(code); append(0)
104+
for op, av in av:
105+
append(CODES[op])
106+
if op is NEGATE:
107+
pass
108+
elif op is LITERAL:
109+
append(fixup(av))
110+
elif op is RANGE:
111+
append(fixup(av[0]))
112+
append(fixup(av[1]))
113+
elif op is CATEGORY:
114+
append(CATEGORIES[av])
115+
else:
116+
raise ValueError, "unsupported set operator"
117+
append(CODES[FAILURE])
118+
code[skip] = len(code) - skip
119+
elif op in (LITERAL, NOT_LITERAL):
120+
if "i" in flags:
121+
append(CODES[MAP_IGNORE[op]])
122+
append(ord(_lower(av)))
123+
else:
124+
append(CODES[op])
125+
append(ord(av))
126+
elif op is MARK:
127+
append(CODES[op])
128+
append(av)
129+
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
130+
lo, hi = av[2].getwidth()
131+
if lo == 0:
132+
raise SyntaxError, "cannot repeat zero-width items"
133+
if lo == hi == 1 and op is MAX_REPEAT:
134+
append(CODES[MAX_REPEAT_ONE])
135+
skip = len(code); append(0)
136+
append(av[0])
137+
append(av[1])
138+
_compile(code, av[2], flags)
139+
append(CODES[SUCCESS])
140+
code[skip] = len(code) - skip
141+
else:
142+
append(CODES[op])
143+
skip = len(code); append(0)
144+
append(av[0])
145+
append(av[1])
146+
_compile(code, av[2], flags)
147+
if op is MIN_REPEAT:
148+
append(CODES[MIN_UNTIL])
149+
else:
150+
# FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
151+
append(CODES[MAX_UNTIL])
152+
code[skip] = len(code) - skip
153+
elif op is SUBPATTERN:
154+
## group = av[0]
155+
## if group:
156+
## append(CODES[MARK])
157+
## append((group-1)*2)
158+
_compile(code, av[1], flags)
159+
## if group:
160+
## append(CODES[MARK])
161+
## append((group-1)*2+1)
162+
else:
163+
raise ValueError, ("unsupported operand type", op)
164+
165+
def compile(p, flags=()):
166+
# convert pattern list to internal format
167+
if type(p) is type(""):
168+
import sre_parse
169+
pattern = p
170+
p = sre_parse.parse(p)
171+
else:
172+
pattern = None
173+
# print p.getwidth()
174+
# print p
175+
code = Code()
176+
_compile(code, p.data, p.pattern.flags)
177+
code.append(CODES[SUCCESS])
178+
# print list(code.data)
179+
data = code.todata()
180+
if 0: # debugging
181+
print
182+
print "-" * 68
183+
import sre_disasm
184+
sre_disasm.disasm(data)
185+
print "-" * 68
186+
# print len(data), p.pattern.groups, len(p.pattern.groupdict)
187+
return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)

Lib/sre_constants.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#
2+
# Secret Labs' Regular Expression Engine
3+
# $Id$
4+
#
5+
# various symbols used by the regular expression engine.
6+
# run this script to update the _sre include files!
7+
#
8+
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
9+
#
10+
# This code can only be used for 1.6 alpha testing. All other use
11+
# require explicit permission from Secret Labs AB.
12+
#
13+
# Portions of this engine have been developed in cooperation with
14+
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
15+
# other compatibility work.
16+
#
17+
18+
# operators
19+
20+
FAILURE = "failure"
21+
SUCCESS = "success"
22+
23+
ANY = "any"
24+
ASSERT = "assert"
25+
AT = "at"
26+
BRANCH = "branch"
27+
CALL = "call"
28+
CATEGORY = "category"
29+
GROUP = "group"
30+
GROUP_IGNORE = "group_ignore"
31+
IN = "in"
32+
IN_IGNORE = "in_ignore"
33+
JUMP = "jump"
34+
LITERAL = "literal"
35+
LITERAL_IGNORE = "literal_ignore"
36+
MARK = "mark"
37+
MAX_REPEAT = "max_repeat"
38+
MAX_REPEAT_ONE = "max_repeat_one"
39+
MAX_UNTIL = "max_until"
40+
MIN_REPEAT = "min_repeat"
41+
MIN_UNTIL = "min_until"
42+
NEGATE = "negate"
43+
NOT_LITERAL = "not_literal"
44+
NOT_LITERAL_IGNORE = "not_literal_ignore"
45+
RANGE = "range"
46+
REPEAT = "repeat"
47+
SUBPATTERN = "subpattern"
48+
49+
# positions
50+
AT_BEGINNING = "at_beginning"
51+
AT_BOUNDARY = "at_boundary"
52+
AT_NON_BOUNDARY = "at_non_boundary"
53+
AT_END = "at_end"
54+
55+
# categories
56+
57+
CATEGORY_DIGIT = "category_digit"
58+
CATEGORY_NOT_DIGIT = "category_not_digit"
59+
CATEGORY_SPACE = "category_space"
60+
CATEGORY_NOT_SPACE = "category_not_space"
61+
CATEGORY_WORD = "category_word"
62+
CATEGORY_NOT_WORD = "category_not_word"
63+
64+
CODES = [
65+
66+
# failure=0 success=1 (just because it looks better that way :-)
67+
FAILURE, SUCCESS,
68+
69+
ANY,
70+
ASSERT,
71+
AT,
72+
BRANCH,
73+
CALL,
74+
CATEGORY,
75+
GROUP, GROUP_IGNORE,
76+
IN, IN_IGNORE,
77+
JUMP,
78+
LITERAL, LITERAL_IGNORE,
79+
MARK,
80+
MAX_REPEAT, MAX_UNTIL,
81+
MAX_REPEAT_ONE,
82+
MIN_REPEAT, MIN_UNTIL,
83+
NOT_LITERAL, NOT_LITERAL_IGNORE,
84+
NEGATE,
85+
RANGE,
86+
REPEAT
87+
88+
]
89+
90+
# convert to dictionary
91+
c = {}
92+
i = 0
93+
for code in CODES:
94+
c[code] = i
95+
i = i + 1
96+
CODES = c
97+
98+
# replacement operations for "ignore case" mode
99+
MAP_IGNORE = {
100+
GROUP: GROUP_IGNORE,
101+
IN: IN_IGNORE,
102+
LITERAL: LITERAL_IGNORE,
103+
NOT_LITERAL: NOT_LITERAL_IGNORE
104+
}
105+
106+
POSITIONS = {
107+
AT_BEGINNING: ord("a"),
108+
AT_BOUNDARY: ord("b"),
109+
AT_NON_BOUNDARY: ord("B"),
110+
AT_END: ord("z"),
111+
}
112+
113+
CATEGORIES = {
114+
CATEGORY_DIGIT: ord("d"),
115+
CATEGORY_NOT_DIGIT: ord("D"),
116+
CATEGORY_SPACE: ord("s"),
117+
CATEGORY_NOT_SPACE: ord("S"),
118+
CATEGORY_WORD: ord("w"),
119+
CATEGORY_NOT_WORD: ord("W"),
120+
}
121+
122+
if __name__ == "__main__":
123+
import string
124+
items = CODES.items()
125+
items.sort(lambda a, b: cmp(a[1], b[1]))
126+
f = open("sre_constants.h", "w")
127+
f.write("/* generated by sre_constants.py */\n")
128+
for k, v in items:
129+
f.write("#define SRE_OP_" + string.upper(k) + " " + str(v) + "\n")
130+
f.close()
131+
print "done"

0 commit comments

Comments
 (0)