@@ -72,9 +72,12 @@ def handle_starttag(self, tag, attrs):
7272
7373class TestCaseBase (unittest .TestCase ):
7474
75+ def get_collector (self ):
76+ raise NotImplementedError
77+
7578 def _run_check (self , source , expected_events , collector = None ):
7679 if collector is None :
77- collector = EventCollector ()
80+ collector = self . get_collector ()
7881 parser = collector
7982 for s in source :
8083 parser .feed (s )
@@ -96,7 +99,10 @@ def parse(source=source):
9699 self .assertRaises (html .parser .HTMLParseError , parse )
97100
98101
99- class HTMLParserTestCase (TestCaseBase ):
102+ class HTMLParserStrictTestCase (TestCaseBase ):
103+
104+ def get_collector (self ):
105+ return EventCollector (strict = True )
100106
101107 def test_processing_instruction_only (self ):
102108 self ._run_check ("<?processing instruction>" , [
@@ -353,12 +359,11 @@ def test_cdata_content(self):
353359
354360
355361 def test_entityrefs_in_attributes (self ):
356- self ._run_check ("<html foo='€&aa&unsupported;'>" , [
357- ("starttag" , "html" , [("foo" , "\u20AC &aa&unsupported;" )])
358- ])
362+ self ._run_check ("<html foo='€&aa&unsupported;'>" ,
363+ [("starttag" , "html" , [("foo" , "\u20AC &aa&unsupported;" )])])
359364
360365
361- class HTMLParserTolerantTestCase (TestCaseBase ):
366+ class HTMLParserTolerantTestCase (HTMLParserStrictTestCase ):
362367
363368 def get_collector (self ):
364369 return EventCollector (strict = False )
@@ -374,8 +379,7 @@ def test_tolerant_parsing(self):
374379 ('endtag' , 'a' ),
375380 ('endtag' , 'html' ),
376381 ('data' , '\n <img src="URL><//img></html' ),
377- ('endtag' , 'html' )],
378- collector = self .get_collector ())
382+ ('endtag' , 'html' )])
379383
380384 def test_with_unquoted_attributes (self ):
381385 # see #12008
@@ -399,22 +403,19 @@ def test_with_unquoted_attributes(self):
399403 ('starttag' , 'span' , [('class' , 'en' )]), ('data' , ' library' ),
400404 ('endtag' , 'span' ), ('endtag' , 'a' ), ('endtag' , 'table' )
401405 ]
402-
403- self ._run_check (html , expected , collector = self .get_collector ())
406+ self ._run_check (html , expected )
404407
405408 def test_comma_between_attributes (self ):
406409 self ._run_check ('<form action="/xxx.php?a=1&b=2&", '
407410 'method="post">' , [
408411 ('starttag' , 'form' ,
409412 [('action' , '/xxx.php?a=1&b=2&' ),
410- ('method' , 'post' )])],
411- collector = self .get_collector ())
413+ ('method' , 'post' )])])
412414
413415 def test_weird_chars_in_unquoted_attribute_values (self ):
414416 self ._run_check ('<form action=bogus|&#()value>' , [
415417 ('starttag' , 'form' ,
416- [('action' , 'bogus|&#()value' )])],
417- collector = self .get_collector ())
418+ [('action' , 'bogus|&#()value' )])])
418419
419420 def test_correct_detection_of_start_tags (self ):
420421 # see #13273
@@ -436,7 +437,7 @@ def test_correct_detection_of_start_tags(self):
436437 ('endtag' , 'b' ),
437438 ('endtag' , 'div' )
438439 ]
439- self ._run_check (html , expected , collector = self . get_collector () )
440+ self ._run_check (html , expected )
440441
441442 html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
442443 expected = [
@@ -447,7 +448,7 @@ def test_correct_detection_of_start_tags(self):
447448 ('data' , 'rain' ),
448449 ('endtag' , 'a' ),
449450 ]
450- self ._run_check (html , expected , collector = self . get_collector () )
451+ self ._run_check (html , expected )
451452
452453 def test_unescape_function (self ):
453454 p = html .parser .HTMLParser ()
@@ -456,8 +457,9 @@ def test_unescape_function(self):
456457 # see #12888
457458 self .assertEqual (p .unescape ('{ ' * 1050 ), '{ ' * 1050 )
458459
460+
459461def test_main ():
460- support .run_unittest (HTMLParserTestCase , HTMLParserTolerantTestCase )
462+ support .run_unittest (HTMLParserStrictTestCase , HTMLParserTolerantTestCase )
461463
462464
463465if __name__ == "__main__" :
0 commit comments