2626# find an array type code that matches the engine's code size
2727for WORDSIZE in "BHil" :
2828 if len (array .array (WORDSIZE , [0 ]).tostring ()) == _sre .getcodesize ():
29- break
29+ break
3030else :
3131 raise RuntimeError , "cannot find a useable array type"
3232
3333# FIXME: <fl> should move some optimizations from the parser to here!
3434
3535class Code :
3636 def __init__ (self ):
37- self .data = []
37+ self .data = []
3838 def __len__ (self ):
39- return len (self .data )
39+ return len (self .data )
4040 def __getitem__ (self , index ):
41- return self .data [index ]
41+ return self .data [index ]
4242 def __setitem__ (self , index , code ):
43- self .data [index ] = code
43+ self .data [index ] = code
4444 def append (self , code ):
45- self .data .append (code )
45+ self .data .append (code )
4646 def todata (self ):
47- # print self.data
48- return array .array (WORDSIZE , self .data ).tostring ()
47+ # print self.data
48+ return array .array (WORDSIZE , self .data ).tostring ()
4949
5050def _lower (literal ):
5151 # return _sre._lower(literal) # FIXME
@@ -54,122 +54,122 @@ def _lower(literal):
5454def _compile (code , pattern , flags ):
5555 append = code .append
5656 for op , av in pattern :
57- if op is ANY :
58- if "s" in flags :
59- append (CODES [op ]) # any character at all!
60- else :
61- append (CODES [NOT_LITERAL ])
62- append (10 )
63- elif op in (SUCCESS , FAILURE ):
64- append (CODES [op ])
65- elif op is AT :
66- append (CODES [op ])
67- append (POSITIONS [av ])
68- elif op is BRANCH :
69- append (CODES [op ])
70- tail = []
71- for av in av [1 ]:
72- skip = len (code ); append (0 )
73- _compile (code , av , flags )
74- append (CODES [JUMP ])
75- tail .append (len (code )); append (0 )
76- code [skip ] = len (code ) - skip
77- append (0 ) # end of branch
78- for tail in tail :
79- code [tail ] = len (code ) - tail
80- elif op is CALL :
81- append (CODES [op ])
82- skip = len (code ); append (0 )
83- _compile (code , av , flags )
84- append (CODES [SUCCESS ])
85- code [skip ] = len (code ) - skip
86- elif op is CATEGORY : # not used by current parser
87- append (CODES [op ])
88- append (CATEGORIES [av ])
89- elif op is GROUP :
90- if "i" in flags :
91- append (CODES [MAP_IGNORE [op ]])
92- else :
93- append (CODES [op ])
94- append (av )
95- elif op is IN :
96- if "i" in flags :
97- append (CODES [MAP_IGNORE [op ]])
98- def fixup (literal ):
99- return ord (_lower (literal ))
100- else :
101- append (CODES [op ])
102- fixup = ord
103- skip = len (code ); append (0 )
104- for op , av in av :
105- append (CODES [op ])
106- if op is NEGATE :
107- pass
108- elif op is LITERAL :
109- append (fixup (av ))
110- elif op is RANGE :
111- append (fixup (av [0 ]))
112- append (fixup (av [1 ]))
113- elif op is CATEGORY :
114- append (CATEGORIES [av ])
115- else :
116- raise ValueError , "unsupported set operator"
117- append (CODES [FAILURE ])
118- code [skip ] = len (code ) - skip
119- elif op in (LITERAL , NOT_LITERAL ):
120- if "i" in flags :
121- append (CODES [MAP_IGNORE [op ]])
122- append (ord (_lower (av )))
123- else :
124- append (CODES [op ])
125- append (ord (av ))
126- elif op is MARK :
127- append (CODES [op ])
128- append (av )
129- elif op in (REPEAT , MIN_REPEAT , MAX_REPEAT ):
130- lo , hi = av [2 ].getwidth ()
131- if lo == 0 :
132- raise SyntaxError , "cannot repeat zero-width items"
133- if lo == hi == 1 and op is MAX_REPEAT :
134- append (CODES [MAX_REPEAT_ONE ])
135- skip = len (code ); append (0 )
136- append (av [0 ])
137- append (av [1 ])
138- _compile (code , av [2 ], flags )
139- append (CODES [SUCCESS ])
140- code [skip ] = len (code ) - skip
141- else :
142- append (CODES [op ])
143- skip = len (code ); append (0 )
144- append (av [0 ])
145- append (av [1 ])
146- _compile (code , av [2 ], flags )
147- if op is MIN_REPEAT :
148- append (CODES [MIN_UNTIL ])
149- else :
150- # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
151- append (CODES [MAX_UNTIL ])
152- code [skip ] = len (code ) - skip
153- elif op is SUBPATTERN :
154- ## group = av[0]
155- ## if group:
156- ## append(CODES[MARK])
157- ## append((group-1)*2)
158- _compile (code , av [1 ], flags )
159- ## if group:
160- ## append(CODES[MARK])
161- ## append((group-1)*2+1)
162- else :
163- raise ValueError , ("unsupported operand type" , op )
57+ if op is ANY :
58+ if "s" in flags :
59+ append (CODES [op ]) # any character at all!
60+ else :
61+ append (CODES [NOT_LITERAL ])
62+ append (10 )
63+ elif op in (SUCCESS , FAILURE ):
64+ append (CODES [op ])
65+ elif op is AT :
66+ append (CODES [op ])
67+ append (POSITIONS [av ])
68+ elif op is BRANCH :
69+ append (CODES [op ])
70+ tail = []
71+ for av in av [1 ]:
72+ skip = len (code ); append (0 )
73+ _compile (code , av , flags )
74+ append (CODES [JUMP ])
75+ tail .append (len (code )); append (0 )
76+ code [skip ] = len (code ) - skip
77+ append (0 ) # end of branch
78+ for tail in tail :
79+ code [tail ] = len (code ) - tail
80+ elif op is CALL :
81+ append (CODES [op ])
82+ skip = len (code ); append (0 )
83+ _compile (code , av , flags )
84+ append (CODES [SUCCESS ])
85+ code [skip ] = len (code ) - skip
86+ elif op is CATEGORY : # not used by current parser
87+ append (CODES [op ])
88+ append (CATEGORIES [av ])
89+ elif op is GROUP :
90+ if "i" in flags :
91+ append (CODES [MAP_IGNORE [op ]])
92+ else :
93+ append (CODES [op ])
94+ append (av )
95+ elif op is IN :
96+ if "i" in flags :
97+ append (CODES [MAP_IGNORE [op ]])
98+ def fixup (literal ):
99+ return ord (_lower (literal ))
100+ else :
101+ append (CODES [op ])
102+ fixup = ord
103+ skip = len (code ); append (0 )
104+ for op , av in av :
105+ append (CODES [op ])
106+ if op is NEGATE :
107+ pass
108+ elif op is LITERAL :
109+ append (fixup (av ))
110+ elif op is RANGE :
111+ append (fixup (av [0 ]))
112+ append (fixup (av [1 ]))
113+ elif op is CATEGORY :
114+ append (CATEGORIES [av ])
115+ else :
116+ raise ValueError , "unsupported set operator"
117+ append (CODES [FAILURE ])
118+ code [skip ] = len (code ) - skip
119+ elif op in (LITERAL , NOT_LITERAL ):
120+ if "i" in flags :
121+ append (CODES [MAP_IGNORE [op ]])
122+ append (ord (_lower (av )))
123+ else :
124+ append (CODES [op ])
125+ append (ord (av ))
126+ elif op is MARK :
127+ append (CODES [op ])
128+ append (av )
129+ elif op in (REPEAT , MIN_REPEAT , MAX_REPEAT ):
130+ lo , hi = av [2 ].getwidth ()
131+ if lo == 0 :
132+ raise SyntaxError , "cannot repeat zero-width items"
133+ if lo == hi == 1 and op is MAX_REPEAT :
134+ append (CODES [MAX_REPEAT_ONE ])
135+ skip = len (code ); append (0 )
136+ append (av [0 ])
137+ append (av [1 ])
138+ _compile (code , av [2 ], flags )
139+ append (CODES [SUCCESS ])
140+ code [skip ] = len (code ) - skip
141+ else :
142+ append (CODES [op ])
143+ skip = len (code ); append (0 )
144+ append (av [0 ])
145+ append (av [1 ])
146+ _compile (code , av [2 ], flags )
147+ if op is MIN_REPEAT :
148+ append (CODES [MIN_UNTIL ])
149+ else :
150+ # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
151+ append (CODES [MAX_UNTIL ])
152+ code [skip ] = len (code ) - skip
153+ elif op is SUBPATTERN :
154+ ## group = av[0]
155+ ## if group:
156+ ## append(CODES[MARK])
157+ ## append((group-1)*2)
158+ _compile (code , av [1 ], flags )
159+ ## if group:
160+ ## append(CODES[MARK])
161+ ## append((group-1)*2+1)
162+ else :
163+ raise ValueError , ("unsupported operand type" , op )
164164
165165def compile (p , flags = ()):
166166 # convert pattern list to internal format
167- if type (p ) is type ("" ):
168- import sre_parse
169- pattern = p
170- p = sre_parse .parse (p )
167+ if type (p ) in ( type ("" ), type ( u"" ) ):
168+ import sre_parse
169+ pattern = p
170+ p = sre_parse .parse (p )
171171 else :
172- pattern = None
172+ pattern = None
173173 # print p.getwidth()
174174 # print p
175175 code = Code ()
@@ -178,10 +178,10 @@ def compile(p, flags=()):
178178 # print list(code.data)
179179 data = code .todata ()
180180 if 0 : # debugging
181- print
182- print "-" * 68
183- import sre_disasm
184- sre_disasm .disasm (data )
185- print "-" * 68
181+ print
182+ print "-" * 68
183+ import sre_disasm
184+ sre_disasm .disasm (data )
185+ print "-" * 68
186186 # print len(data), p.pattern.groups, len(p.pattern.groupdict)
187187 return _sre .compile (pattern , data , p .pattern .groups - 1 , p .pattern .groupdict )
0 commit comments