@@ -196,60 +196,6 @@ def test_bare_pointy_brackets(self):
196196 ("data" , "this < text > contains < bare>pointy< brackets" ),
197197 ])
198198
199- def test_attr_syntax (self ):
200- output = [
201- ("starttag" , "a" , [("b" , "v" ), ("c" , "v" ), ("d" , "v" ), ("e" , None )])
202- ]
203- self ._run_check ("""<a b='v' c="v" d=v e>""" , output )
204- self ._run_check ("""<a b = 'v' c = "v" d = v e>""" , output )
205- self ._run_check ("""<a\n b\n =\n 'v'\n c\n =\n "v"\n d\n =\n v\n e>""" , output )
206- self ._run_check ("""<a\t b\t =\t 'v'\t c\t =\t "v"\t d\t =\t v\t e>""" , output )
207-
208- def test_attr_values (self ):
209- self ._run_check ("""<a b='xxx\n \t xxx' c="yyy\t \n yyy" d='\t xyz\n '>""" ,
210- [("starttag" , "a" , [("b" , "xxx\n \t xxx" ),
211- ("c" , "yyy\t \n yyy" ),
212- ("d" , "\t xyz\n " )])
213- ])
214- self ._run_check ("""<a b='' c="">""" , [
215- ("starttag" , "a" , [("b" , "" ), ("c" , "" )]),
216- ])
217- # Regression test for SF patch #669683.
218- self ._run_check ("<e a=rgb(1,2,3)>" , [
219- ("starttag" , "e" , [("a" , "rgb(1,2,3)" )]),
220- ])
221- # Regression test for SF bug #921657.
222- self .
_run_check (
"<a href=mailto:[email protected] >" , [
223- (
"starttag" ,
"a" , [(
"href" ,
"mailto:[email protected] " )]),
224- ])
225-
226- def test_attr_nonascii (self ):
227- # see issue 7311
228- self ._run_check ("<img src=/foo/bar.png alt=\u4e2d \u6587 >" , [
229- ("starttag" , "img" , [("src" , "/foo/bar.png" ),
230- ("alt" , "\u4e2d \u6587 " )]),
231- ])
232- self ._run_check ("<a title='\u30c6 \u30b9 \u30c8 ' "
233- "href='\u30c6 \u30b9 \u30c8 .html'>" , [
234- ("starttag" , "a" , [("title" , "\u30c6 \u30b9 \u30c8 " ),
235- ("href" , "\u30c6 \u30b9 \u30c8 .html" )]),
236- ])
237- self ._run_check ('<a title="\u30c6 \u30b9 \u30c8 " '
238- 'href="\u30c6 \u30b9 \u30c8 .html">' , [
239- ("starttag" , "a" , [("title" , "\u30c6 \u30b9 \u30c8 " ),
240- ("href" , "\u30c6 \u30b9 \u30c8 .html" )]),
241- ])
242-
243- def test_attr_entity_replacement (self ):
244- self ._run_check ("""<a b='&><"''>""" , [
245- ("starttag" , "a" , [("b" , "&><\" '" )]),
246- ])
247-
248- def test_attr_funky_names (self ):
249- self ._run_check ("""<a a.b='v' c:d=v e-f=v>""" , [
250- ("starttag" , "a" , [("a.b" , "v" ), ("c:d" , "v" ), ("e-f" , "v" )]),
251- ])
252-
253199 def test_illegal_declarations (self ):
254200 self ._parse_error ('<!spacer type="block" height="25">' )
255201
@@ -295,13 +241,11 @@ def test_starttag_junk_chars(self):
295241 self ._parse_error ("<a<a>" )
296242 self ._parse_error ("</a<a>" )
297243 self ._parse_error ("<!" )
298- self ._parse_error ("<a $>" )
299244 self ._parse_error ("<a" )
300245 self ._parse_error ("<a foo='bar'" )
301246 self ._parse_error ("<a foo='bar" )
302247 self ._parse_error ("<a foo='>'" )
303248 self ._parse_error ("<a foo='>" )
304- self ._parse_error ("<a foo=>" )
305249
306250 def test_declaration_junk_chars (self ):
307251 self ._parse_error ("<!DOCTYPE foo $ >" )
@@ -358,10 +302,6 @@ def test_cdata_content(self):
358302 ("endtag" , element_lower )])
359303
360304
361- def test_entityrefs_in_attributes (self ):
362- self ._run_check ("<html foo='€&aa&unsupported;'>" ,
363- [("starttag" , "html" , [("foo" , "\u20AC &aa&unsupported;" )])])
364-
365305
366306class HTMLParserTolerantTestCase (HTMLParserStrictTestCase ):
367307
@@ -371,15 +311,14 @@ def get_collector(self):
371311 def test_tolerant_parsing (self ):
372312 self ._run_check ('<html <html>te>>xt&a<<bc</a></html>\n '
373313 '<img src="URL><//img></html</html>' , [
374- ('data' , '<html ' ),
375- ('starttag' , 'html' , []),
376- ('data' , 'te>>xt' ),
377- ('entityref' , 'a' ),
378- ('data' , '<<bc' ),
379- ('endtag' , 'a' ),
380- ('endtag' , 'html' ),
381- ('data' , '\n <img src="URL><//img></html' ),
382- ('endtag' , 'html' )])
314+ ('starttag' , 'html' , [('<html' , None )]),
315+ ('data' , 'te>>xt' ),
316+ ('entityref' , 'a' ),
317+ ('data' , '<<bc' ),
318+ ('endtag' , 'a' ),
319+ ('endtag' , 'html' ),
320+ ('data' , '\n <img src="URL><//img></html' ),
321+ ('endtag' , 'html' )])
383322
384323 def test_with_unquoted_attributes (self ):
385324 # see #12008
@@ -410,7 +349,7 @@ def test_comma_between_attributes(self):
410349 'method="post">' , [
411350 ('starttag' , 'form' ,
412351 [('action' , '/xxx.php?a=1&b=2&' ),
413- ('method' , 'post' )])])
352+ (',' , None ), ( ' method' , 'post' )])])
414353
415354 def test_weird_chars_in_unquoted_attribute_values (self ):
416355 self ._run_check ('<form action=bogus|&#()value>' , [
@@ -441,7 +380,7 @@ def test_correct_detection_of_start_tags(self):
441380
442381 html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
443382 expected = [
444- ('starttag' , 'div' , [('style' , '' ), ('foo' , 'bar' )]),
383+ ('starttag' , 'div' , [('style' , '' ), (',' , None ), ( ' foo' , 'bar' )]),
445384 ('starttag' , 'b' , []),
446385 ('data' , 'The ' ),
447386 ('starttag' , 'a' , [('href' , 'some_url' )]),
@@ -458,8 +397,151 @@ def test_unescape_function(self):
458397 self .assertEqual (p .unescape ('{ ' * 1050 ), '{ ' * 1050 )
459398
460399
400+ class AttributesStrictTestCase (TestCaseBase ):
401+
402+ def get_collector (self ):
403+ return EventCollector (strict = True )
404+
405+ def test_attr_syntax (self ):
406+ output = [
407+ ("starttag" , "a" , [("b" , "v" ), ("c" , "v" ), ("d" , "v" ), ("e" , None )])
408+ ]
409+ self ._run_check ("""<a b='v' c="v" d=v e>""" , output )
410+ self ._run_check ("""<a b = 'v' c = "v" d = v e>""" , output )
411+ self ._run_check ("""<a\n b\n =\n 'v'\n c\n =\n "v"\n d\n =\n v\n e>""" , output )
412+ self ._run_check ("""<a\t b\t =\t 'v'\t c\t =\t "v"\t d\t =\t v\t e>""" , output )
413+
414+ def test_attr_values (self ):
415+ self ._run_check ("""<a b='xxx\n \t xxx' c="yyy\t \n yyy" d='\t xyz\n '>""" ,
416+ [("starttag" , "a" , [("b" , "xxx\n \t xxx" ),
417+ ("c" , "yyy\t \n yyy" ),
418+ ("d" , "\t xyz\n " )])])
419+ self ._run_check ("""<a b='' c="">""" ,
420+ [("starttag" , "a" , [("b" , "" ), ("c" , "" )])])
421+ # Regression test for SF patch #669683.
422+ self ._run_check ("<e a=rgb(1,2,3)>" ,
423+ [("starttag" , "e" , [("a" , "rgb(1,2,3)" )])])
424+ # Regression test for SF bug #921657.
425+ self ._run_check (
426+ "<a href=mailto:[email protected] >" ,
427+ [(
"starttag" ,
"a" , [(
"href" ,
"mailto:[email protected] " )])])
428+
429+ def test_attr_nonascii (self ):
430+ # see issue 7311
431+ self ._run_check (
432+ "<img src=/foo/bar.png alt=\u4e2d \u6587 >" ,
433+ [("starttag" , "img" , [("src" , "/foo/bar.png" ),
434+ ("alt" , "\u4e2d \u6587 " )])])
435+ self ._run_check (
436+ "<a title='\u30c6 \u30b9 \u30c8 ' href='\u30c6 \u30b9 \u30c8 .html'>" ,
437+ [("starttag" , "a" , [("title" , "\u30c6 \u30b9 \u30c8 " ),
438+ ("href" , "\u30c6 \u30b9 \u30c8 .html" )])])
439+ self ._run_check (
440+ '<a title="\u30c6 \u30b9 \u30c8 " href="\u30c6 \u30b9 \u30c8 .html">' ,
441+ [("starttag" , "a" , [("title" , "\u30c6 \u30b9 \u30c8 " ),
442+ ("href" , "\u30c6 \u30b9 \u30c8 .html" )])])
443+
444+ def test_attr_entity_replacement (self ):
445+ self ._run_check (
446+ "<a b='&><"''>" ,
447+ [("starttag" , "a" , [("b" , "&><\" '" )])])
448+
449+ def test_attr_funky_names (self ):
450+ self ._run_check (
451+ "<a a.b='v' c:d=v e-f=v>" ,
452+ [("starttag" , "a" , [("a.b" , "v" ), ("c:d" , "v" ), ("e-f" , "v" )])])
453+
454+ def test_entityrefs_in_attributes (self ):
455+ self ._run_check (
456+ "<html foo='€&aa&unsupported;'>" ,
457+ [("starttag" , "html" , [("foo" , "\u20AC &aa&unsupported;" )])])
458+
459+
460+
461+ class AttributesTolerantTestCase (AttributesStrictTestCase ):
462+
463+ def get_collector (self ):
464+ return EventCollector (strict = False )
465+
466+ def test_attr_funky_names2 (self ):
467+ self ._run_check (
468+ "<a $><b $=%><c \=/>" ,
469+ [("starttag" , "a" , [("$" , None )]),
470+ ("starttag" , "b" , [("$" , "%" )]),
471+ ("starttag" , "c" , [("\\ " , "/" )])])
472+
473+ def test_entities_in_attribute_value (self ):
474+ # see #1200313
475+ for entity in ['&' , '&' , '&' , '&' ]:
476+ self ._run_check ('<a href="%s">' % entity ,
477+ [("starttag" , "a" , [("href" , "&" )])])
478+ self ._run_check ("<a href='%s'>" % entity ,
479+ [("starttag" , "a" , [("href" , "&" )])])
480+ self ._run_check ("<a href=%s>" % entity ,
481+ [("starttag" , "a" , [("href" , "&" )])])
482+
483+ def test_malformed_attributes (self ):
484+ # see #13357
485+ html = (
486+ "<a href=test'style='color:red;bad1'>test - bad1</a>"
487+ "<a href=test'+style='color:red;ba2'>test - bad2</a>"
488+ "<a href=test' style='color:red;bad3'>test - bad3</a>"
489+ "<a href = test' style='color:red;bad4' >test - bad4</a>"
490+ )
491+ expected = [
492+ ('starttag' , 'a' , [('href' , "test'style='color:red;bad1'" )]),
493+ ('data' , 'test - bad1' ), ('endtag' , 'a' ),
494+ ('starttag' , 'a' , [('href' , "test'+style='color:red;ba2'" )]),
495+ ('data' , 'test - bad2' ), ('endtag' , 'a' ),
496+ ('starttag' , 'a' , [('href' , "test'\xa0 style='color:red;bad3'" )]),
497+ ('data' , 'test - bad3' ), ('endtag' , 'a' ),
498+ ('starttag' , 'a' , [('href' , "test'\xa0 style='color:red;bad4'" )]),
499+ ('data' , 'test - bad4' ), ('endtag' , 'a' )
500+ ]
501+ self ._run_check (html , expected )
502+
503+ def test_malformed_adjacent_attributes (self ):
504+ # see #12629
505+ self ._run_check ('<x><y z=""o"" /></x>' ,
506+ [('starttag' , 'x' , []),
507+ ('startendtag' , 'y' , [('z' , '' ), ('o""' , None )]),
508+ ('endtag' , 'x' )])
509+ self ._run_check ('<x><y z="""" /></x>' ,
510+ [('starttag' , 'x' , []),
511+ ('startendtag' , 'y' , [('z' , '' ), ('""' , None )]),
512+ ('endtag' , 'x' )])
513+
514+ # see #755670 for the following 3 tests
515+ def test_adjacent_attributes (self ):
516+ self ._run_check ('<a width="100%"cellspacing=0>' ,
517+ [("starttag" , "a" ,
518+ [("width" , "100%" ), ("cellspacing" ,"0" )])])
519+
520+ self ._run_check ('<a id="foo"class="bar">' ,
521+ [("starttag" , "a" ,
522+ [("id" , "foo" ), ("class" ,"bar" )])])
523+
524+ def test_missing_attribute_value (self ):
525+ self ._run_check ('<a v=>' ,
526+ [("starttag" , "a" , [("v" , "" )])])
527+
528+ def test_javascript_attribute_value (self ):
529+ self ._run_check ("<a href=javascript:popup('/popup/help.html')>" ,
530+ [("starttag" , "a" ,
531+ [("href" , "javascript:popup('/popup/help.html')" )])])
532+
533+ def test_end_tag_in_attribute_value (self ):
534+ # see #1745761
535+ self ._run_check ("<a href='http://www.example.org/\" >;'>spam</a>" ,
536+ [("starttag" , "a" ,
537+ [("href" , "http://www.example.org/\" >;" )]),
538+ ("data" , "spam" ), ("endtag" , "a" )])
539+
540+
541+
461542def test_main ():
462- support .run_unittest (HTMLParserStrictTestCase , HTMLParserTolerantTestCase )
543+ support .run_unittest (HTMLParserStrictTestCase , HTMLParserTolerantTestCase ,
544+ AttributesStrictTestCase , AttributesTolerantTestCase )
463545
464546
465547if __name__ == "__main__" :
0 commit comments