1111# other compatibility work.
1212#
1313
14- import array , string , sys
15-
14+ import array
1615import _sre
1716
1817from sre_constants import *
2423else :
2524 raise RuntimeError , "cannot find a useable array type"
2625
27- # FIXME: <fl> should move some optimizations from the parser to here!
28-
29- class Code :
30- def __init__ (self ):
31- self .data = []
32- def __len__ (self ):
33- return len (self .data )
34- def __getitem__ (self , index ):
35- return self .data [index ]
36- def __setitem__ (self , index , code ):
37- self .data [index ] = code
38- def append (self , code ):
39- self .data .append (code )
40- def todata (self ):
41- # print self.data
42- try :
43- return array .array (WORDSIZE , self .data ).tostring ()
44- except OverflowError :
45- print self .data
46- raise
47-
4826def _compile (code , pattern , flags ):
49- append = code .append
27+ emit = code .append
5028 for op , av in pattern :
5129 if op is ANY :
5230 if flags & SRE_FLAG_DOTALL :
53- append (OPCODES [op ]) # any character at all!
31+ emit (OPCODES [op ])
5432 else :
55- append (OPCODES [CATEGORY ])
56- append (CHCODES [CATEGORY_NOT_LINEBREAK ])
33+ emit (OPCODES [CATEGORY ])
34+ emit (CHCODES [CATEGORY_NOT_LINEBREAK ])
5735 elif op in (SUCCESS , FAILURE ):
58- append (OPCODES [op ])
36+ emit (OPCODES [op ])
5937 elif op is AT :
60- append (OPCODES [op ])
38+ emit (OPCODES [op ])
6139 if flags & SRE_FLAG_MULTILINE :
62- append (ATCODES [AT_MULTILINE [av ]])
40+ emit (ATCODES [AT_MULTILINE [av ]])
6341 else :
64- append (ATCODES [av ])
42+ emit (ATCODES [av ])
6543 elif op is BRANCH :
66- append (OPCODES [op ])
44+ emit (OPCODES [op ])
6745 tail = []
6846 for av in av [1 ]:
69- skip = len (code ); append (0 )
47+ skip = len (code ); emit (0 )
7048 _compile (code , av , flags )
71- ## append(OPCODES[SUCCESS])
72- append (OPCODES [JUMP ])
73- tail .append (len (code )); append (0 )
49+ emit (OPCODES [JUMP ])
50+ tail .append (len (code )); emit (0 )
7451 code [skip ] = len (code ) - skip
75- append (0 ) # end of branch
52+ emit (0 ) # end of branch
7653 for tail in tail :
7754 code [tail ] = len (code ) - tail
7855 elif op is CALL :
79- append (OPCODES [op ])
80- skip = len (code ); append (0 )
56+ emit (OPCODES [op ])
57+ skip = len (code ); emit (0 )
8158 _compile (code , av , flags )
82- append (OPCODES [SUCCESS ])
59+ emit (OPCODES [SUCCESS ])
8360 code [skip ] = len (code ) - skip
8461 elif op is CATEGORY :
85- append (OPCODES [op ])
62+ emit (OPCODES [op ])
8663 if flags & SRE_FLAG_LOCALE :
87- append (CH_LOCALE [CHCODES [av ]])
64+ emit (CH_LOCALE [CHCODES [av ]])
8865 elif flags & SRE_FLAG_UNICODE :
89- append (CH_UNICODE [CHCODES [av ]])
66+ emit (CH_UNICODE [CHCODES [av ]])
9067 else :
91- append (CHCODES [av ])
68+ emit (CHCODES [av ])
9269 elif op is GROUP :
9370 if flags & SRE_FLAG_IGNORECASE :
94- append (OPCODES [OP_IGNORE [op ]])
71+ emit (OPCODES [OP_IGNORE [op ]])
9572 else :
96- append (OPCODES [op ])
97- append (av - 1 )
73+ emit (OPCODES [op ])
74+ emit (av - 1 )
9875 elif op is IN :
9976 if flags & SRE_FLAG_IGNORECASE :
100- append (OPCODES [OP_IGNORE [op ]])
77+ emit (OPCODES [OP_IGNORE [op ]])
10178 def fixup (literal , flags = flags ):
10279 return _sre .getlower (ord (literal ), flags )
10380 else :
104- append (OPCODES [op ])
81+ emit (OPCODES [op ])
10582 fixup = ord
106- skip = len (code ); append (0 )
83+ skip = len (code ); emit (0 )
10784 for op , av in av :
108- append (OPCODES [op ])
85+ emit (OPCODES [op ])
10986 if op is NEGATE :
11087 pass
11188 elif op is LITERAL :
112- append (fixup (av ))
89+ emit (fixup (av ))
11390 elif op is RANGE :
114- append (fixup (av [0 ]))
115- append (fixup (av [1 ]))
91+ emit (fixup (av [0 ]))
92+ emit (fixup (av [1 ]))
11693 elif op is CATEGORY :
11794 if flags & SRE_FLAG_LOCALE :
118- append (CH_LOCALE [CHCODES [av ]])
95+ emit (CH_LOCALE [CHCODES [av ]])
11996 elif flags & SRE_FLAG_UNICODE :
120- append (CH_UNICODE [CHCODES [av ]])
97+ emit (CH_UNICODE [CHCODES [av ]])
12198 else :
122- append (CHCODES [av ])
99+ emit (CHCODES [av ])
123100 else :
124- raise ValueError , "unsupported set operator"
125- append (OPCODES [FAILURE ])
101+ raise error , "internal: unsupported set operator"
102+ emit (OPCODES [FAILURE ])
126103 code [skip ] = len (code ) - skip
127104 elif op in (LITERAL , NOT_LITERAL ):
128105 if flags & SRE_FLAG_IGNORECASE :
129- append (OPCODES [OP_IGNORE [op ]])
106+ emit (OPCODES [OP_IGNORE [op ]])
130107 else :
131- append (OPCODES [op ])
132- append (ord (av ))
108+ emit (OPCODES [op ])
109+ emit (ord (av ))
133110 elif op is MARK :
134- append (OPCODES [op ])
135- append (av )
111+ emit (OPCODES [op ])
112+ emit (av )
136113 elif op in (REPEAT , MIN_REPEAT , MAX_REPEAT ):
137114 if flags & SRE_FLAG_TEMPLATE :
138- append (OPCODES [REPEAT ])
139- skip = len (code ); append (0 )
140- append (av [0 ])
141- append (av [1 ])
115+ emit (OPCODES [REPEAT ])
116+ skip = len (code ); emit (0 )
117+ emit (av [0 ])
118+ emit (av [1 ])
142119 _compile (code , av [2 ], flags )
143- append (OPCODES [SUCCESS ])
120+ emit (OPCODES [SUCCESS ])
144121 code [skip ] = len (code ) - skip
145122 else :
146123 lo , hi = av [2 ].getwidth ()
@@ -149,54 +126,50 @@ def fixup(literal, flags=flags):
149126 if 0 and lo == hi == 1 and op is MAX_REPEAT :
150127 # FIXME: <fl> need a better way to figure out when
151128 # it's safe to use this one (in the parser, probably)
152- append (OPCODES [MAX_REPEAT_ONE ])
153- skip = len (code ); append (0 )
154- append (av [0 ])
155- append (av [1 ])
129+ emit (OPCODES [MAX_REPEAT_ONE ])
130+ skip = len (code ); emit (0 )
131+ emit (av [0 ])
132+ emit (av [1 ])
156133 _compile (code , av [2 ], flags )
157- append (OPCODES [SUCCESS ])
134+ emit (OPCODES [SUCCESS ])
158135 code [skip ] = len (code ) - skip
159136 else :
160- append (OPCODES [op ])
161- skip = len (code ); append (0 )
162- append (av [0 ])
163- append (av [1 ])
137+ emit (OPCODES [op ])
138+ skip = len (code ); emit (0 )
139+ emit (av [0 ])
140+ emit (av [1 ])
164141 _compile (code , av [2 ], flags )
165- append (OPCODES [SUCCESS ])
142+ emit (OPCODES [SUCCESS ])
166143 code [skip ] = len (code ) - skip
167144 elif op is SUBPATTERN :
168145 group = av [0 ]
169146 if group :
170- append (OPCODES [MARK ])
171- append ((group - 1 )* 2 )
147+ emit (OPCODES [MARK ])
148+ emit ((group - 1 )* 2 )
172149 _compile (code , av [1 ], flags )
173150 if group :
174- append (OPCODES [MARK ])
175- append ((group - 1 )* 2 + 1 )
151+ emit (OPCODES [MARK ])
152+ emit ((group - 1 )* 2 + 1 )
176153 else :
177154 raise ValueError , ("unsupported operand type" , op )
178155
179156def compile (p , flags = 0 ):
180- # convert pattern list to internal format
157+ # internal: convert pattern list to internal format
181158 if type (p ) in (type ("" ), type (u"" )):
182159 import sre_parse
183160 pattern = p
184161 p = sre_parse .parse (p )
185162 else :
186163 pattern = None
187164 flags = p .pattern .flags | flags
188- code = Code ()
165+ code = []
189166 _compile (code , p .data , flags )
190167 code .append (OPCODES [SUCCESS ])
191- data = code .todata ()
192- if 0 : # debugging
193- print
194- print "-" * 68
195- import sre_disasm
196- sre_disasm .disasm (data )
197- print "-" * 68
168+ # FIXME: <fl> get rid of this limitation
169+ assert p .pattern .groups <= 100 ,\
170+ "sorry, but this version only supports 100 named groups"
198171 return _sre .compile (
199172 pattern , flags ,
200- data ,
173+ array . array ( WORDSIZE , code ). tostring () ,
201174 p .pattern .groups - 1 , p .pattern .groupdict
202175 )
0 commit comments