1414# other compatibility work.
1515#
1616
17- # FIXME: <fl> formalize (objectify?) and document the compiler code
18- # format, so that other frontends can use this compiler
19-
2017import array , string , sys
2118
2219import _sre
@@ -45,64 +42,70 @@ def append(self, code):
4542 self .data .append (code )
4643 def todata (self ):
4744 # print self.data
48- return array . array ( WORDSIZE , self . data ). tostring ()
49-
50- def _lower ( literal ) :
51- # return _sre._lower(literal) # FIXME
52- return string . lower ( literal )
45+ try :
46+ return array . array ( WORDSIZE , self . data ). tostring ()
47+ except OverflowError :
48+ print self . data
49+ raise
5350
54- def _compile (code , pattern , flags ):
51+ def _compile (code , pattern , flags , level = 0 ):
5552 append = code .append
5653 for op , av in pattern :
5754 if op is ANY :
58- if "s" in flags :
59- append (CODES [op ]) # any character at all!
55+ if flags & SRE_FLAG_DOTALL :
56+ append (OPCODES [op ]) # any character at all!
6057 else :
61- append (CODES [ NOT_LITERAL ])
62- append (10 )
58+ append (OPCODES [ CATEGORY ])
59+ append (CHCODES [ CATEGORY_NOT_LINEBREAK ] )
6360 elif op in (SUCCESS , FAILURE ):
64- append (CODES [op ])
61+ append (OPCODES [op ])
6562 elif op is AT :
66- append (CODES [op ])
67- append (POSITIONS [av ])
63+ append (OPCODES [op ])
64+ if flags & SRE_FLAG_MULTILINE :
65+ append (ATCODES [AT_MULTILINE [av ]])
66+ else :
67+ append (ATCODES [av ])
6868 elif op is BRANCH :
69- append (CODES [op ])
69+ append (OPCODES [op ])
7070 tail = []
7171 for av in av [1 ]:
7272 skip = len (code ); append (0 )
73- _compile (code , av , flags )
74- append (CODES [JUMP ])
73+ _compile (code , av , flags , level )
74+ append (OPCODES [JUMP ])
7575 tail .append (len (code )); append (0 )
7676 code [skip ] = len (code ) - skip
7777 append (0 ) # end of branch
7878 for tail in tail :
7979 code [tail ] = len (code ) - tail
8080 elif op is CALL :
81- append (CODES [op ])
81+ append (OPCODES [op ])
8282 skip = len (code ); append (0 )
83- _compile (code , av , flags )
84- append (CODES [SUCCESS ])
83+ _compile (code , av , flags , level + 1 )
84+ append (OPCODES [SUCCESS ])
8585 code [skip ] = len (code ) - skip
8686 elif op is CATEGORY : # not used by current parser
87- append (CODES [op ])
88- append (CATEGORIES [av ])
87+ append (OPCODES [op ])
88+ if flags & SRE_FLAG_LOCALE :
89+ append (CH_LOCALE [CHCODES [av ]])
90+ else :
91+ append (CHCODES [av ])
8992 elif op is GROUP :
90- if "i" in flags :
91- append (CODES [ MAP_IGNORE [op ]])
93+ if flags & SRE_FLAG_IGNORECASE :
94+ append (OPCODES [ OP_IGNORE [op ]])
9295 else :
93- append (CODES [op ])
94- append (av )
96+ append (OPCODES [op ])
97+ append (av - 1 )
9598 elif op is IN :
96- if "i" in flags :
97- append (CODES [ MAP_IGNORE [op ]])
99+ if flags & SRE_FLAG_IGNORECASE :
100+ append (OPCODES [ OP_IGNORE [op ]])
98101 def fixup (literal ):
99- return ord (_lower ( literal ))
102+ return ord (literal . lower ( ))
100103 else :
101- append (CODES [op ])
104+ append (OPCODES [op ])
102105 fixup = ord
103106 skip = len (code ); append (0 )
104107 for op , av in av :
105- append (CODES [op ])
108+ append (OPCODES [op ])
106109 if op is NEGATE :
107110 pass
108111 elif op is LITERAL :
@@ -111,77 +114,80 @@ def fixup(literal):
111114 append (fixup (av [0 ]))
112115 append (fixup (av [1 ]))
113116 elif op is CATEGORY :
114- append (CATEGORIES [av ])
117+ if flags & SRE_FLAG_LOCALE :
118+ append (CH_LOCALE [CHCODES [av ]])
119+ else :
120+ append (CHCODES [av ])
115121 else :
116122 raise ValueError , "unsupported set operator"
117- append (CODES [FAILURE ])
123+ append (OPCODES [FAILURE ])
118124 code [skip ] = len (code ) - skip
119125 elif op in (LITERAL , NOT_LITERAL ):
120- if "i" in flags :
121- append (CODES [ MAP_IGNORE [op ]])
122- append (ord (_lower ( av )))
126+ if flags & SRE_FLAG_IGNORECASE :
127+ append (OPCODES [ OP_IGNORE [op ]])
128+ append (ord (av . lower ( )))
123129 else :
124- append (CODES [op ])
130+ append (OPCODES [op ])
125131 append (ord (av ))
126132 elif op is MARK :
127- append (CODES [op ])
133+ append (OPCODES [op ])
128134 append (av )
129135 elif op in (REPEAT , MIN_REPEAT , MAX_REPEAT ):
130136 lo , hi = av [2 ].getwidth ()
131137 if lo == 0 :
132138 raise SyntaxError , "cannot repeat zero-width items"
133139 if lo == hi == 1 and op is MAX_REPEAT :
134- append (CODES [MAX_REPEAT_ONE ])
140+ append (OPCODES [MAX_REPEAT_ONE ])
135141 skip = len (code ); append (0 )
136142 append (av [0 ])
137143 append (av [1 ])
138- _compile (code , av [2 ], flags )
139- append (CODES [SUCCESS ])
144+ _compile (code , av [2 ], flags , level + 1 )
145+ append (OPCODES [SUCCESS ])
140146 code [skip ] = len (code ) - skip
141147 else :
142- append (CODES [op ])
148+ append (OPCODES [op ])
143149 skip = len (code ); append (0 )
144150 append (av [0 ])
145151 append (av [1 ])
146- _compile (code , av [2 ], flags )
152+ _compile (code , av [2 ], flags , level + 1 )
147153 if op is MIN_REPEAT :
148- append (CODES [MIN_UNTIL ])
154+ append (OPCODES [MIN_UNTIL ])
149155 else :
150- # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
151- append (CODES [MAX_UNTIL ])
156+ append (OPCODES [MAX_UNTIL ])
152157 code [skip ] = len (code ) - skip
153158 elif op is SUBPATTERN :
154- ## group = av[0]
155- ## if group:
156- ## append(CODES [MARK])
157- ## append((group-1)*2)
158- _compile (code , av [1 ], flags )
159- ## if group:
160- ## append(CODES [MARK])
161- ## append((group-1)*2+1)
159+ group = av [0 ]
160+ if group :
161+ append (OPCODES [MARK ])
162+ append ((group - 1 )* 2 )
163+ _compile (code , av [1 ], flags , level + 1 )
164+ if group :
165+ append (OPCODES [MARK ])
166+ append ((group - 1 )* 2 + 1 )
162167 else :
163168 raise ValueError , ("unsupported operand type" , op )
164169
165- def compile (p , flags = () ):
170+ def compile (p , flags = 0 ):
166171 # convert pattern list to internal format
167172 if type (p ) in (type ("" ), type (u"" )):
168173 import sre_parse
169174 pattern = p
170175 p = sre_parse .parse (p )
171176 else :
172177 pattern = None
173- # print p.getwidth()
174- # print p
178+ flags = p .pattern .flags | flags
175179 code = Code ()
176- _compile (code , p .data , p .pattern .flags )
177- code .append (CODES [SUCCESS ])
178- # print list(code.data)
180+ _compile (code , p .data , flags )
181+ code .append (OPCODES [SUCCESS ])
179182 data = code .todata ()
180183 if 0 : # debugging
181184 print
182185 print "-" * 68
183186 import sre_disasm
184187 sre_disasm .disasm (data )
185188 print "-" * 68
186- # print len(data), p.pattern.groups, len(p.pattern.groupdict)
187- return _sre .compile (pattern , data , p .pattern .groups - 1 , p .pattern .groupdict )
189+ return _sre .compile (
190+ pattern , flags ,
191+ data ,
192+ p .pattern .groups - 1 , p .pattern .groupdict
193+ )
0 commit comments