6262 \s* # trailing whitespace
6363""" , re .VERBOSE )
6464endendtag = re .compile ('>' )
65+ # the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
66+ # </ and the tag name, so maybe this should be fixed
6567endtagfind = re .compile ('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>' )
6668
6769
@@ -121,6 +123,7 @@ def reset(self):
121123 self .rawdata = ''
122124 self .lasttag = '???'
123125 self .interesting = interesting_normal
126+ self .cdata_elem = None
124127 _markupbase .ParserBase .reset (self )
125128
126129 def feed (self , data ):
@@ -145,11 +148,13 @@ def get_starttag_text(self):
145148 """Return full source of start tag: '<...>'."""
146149 return self .__starttag_text
147150
148- def set_cdata_mode (self ):
151+ def set_cdata_mode (self , elem ):
149152 self .interesting = interesting_cdata
153+ self .cdata_elem = elem .lower ()
150154
151155 def clear_cdata_mode (self ):
152156 self .interesting = interesting_normal
157+ self .cdata_elem = None
153158
154159 # Internal -- handle data as far as reasonable. May leave state
155160 # and data to be processed by a subsequent call. If 'end' is
@@ -314,7 +319,7 @@ def parse_starttag(self, i):
314319 else :
315320 self .handle_starttag (tag , attrs )
316321 if tag in self .CDATA_CONTENT_ELEMENTS :
317- self .set_cdata_mode ()
322+ self .set_cdata_mode (tag )
318323 return endpos
319324
320325 # Internal -- check to see if we have a complete starttag; return end
@@ -371,6 +376,9 @@ def parse_endtag(self, i):
371376 j = match .end ()
372377 match = endtagfind .match (rawdata , i ) # </ + tag + >
373378 if not match :
379+ if self .cdata_elem is not None :
380+ self .handle_data (rawdata [i :j ])
381+ return j
374382 if self .strict :
375383 self .error ("bad end tag: %r" % (rawdata [i :j ],))
376384 k = rawdata .find ('<' , i + 1 , j )
@@ -380,8 +388,14 @@ def parse_endtag(self, i):
380388 j = i + 1
381389 self .handle_data (rawdata [i :j ])
382390 return j
383- tag = match .group (1 )
384- self .handle_endtag (tag .lower ())
391+
392+ elem = match .group (1 ).lower () # script or style
393+ if self .cdata_elem is not None :
394+ if elem != self .cdata_elem :
395+ self .handle_data (rawdata [i :j ])
396+ return j
397+
398+ self .handle_endtag (elem .lower ())
385399 self .clear_cdata_mode ()
386400 return j
387401
0 commit comments