Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c1e73c3

Browse files
committed
Make sure that the tolerant parser still parses valid HTML correctly.
1 parent b9a48f7 commit c1e73c3

1 file changed

Lines changed: 19 additions & 17 deletions

File tree

Lib/test/test_htmlparser.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,12 @@ def handle_starttag(self, tag, attrs):
7272

7373
class TestCaseBase(unittest.TestCase):
7474

75+
def get_collector(self):
76+
raise NotImplementedError
77+
7578
def _run_check(self, source, expected_events, collector=None):
7679
if collector is None:
77-
collector = EventCollector()
80+
collector = self.get_collector()
7881
parser = collector
7982
for s in source:
8083
parser.feed(s)
@@ -96,7 +99,10 @@ def parse(source=source):
9699
self.assertRaises(html.parser.HTMLParseError, parse)
97100

98101

99-
class HTMLParserTestCase(TestCaseBase):
102+
class HTMLParserStrictTestCase(TestCaseBase):
103+
104+
def get_collector(self):
105+
return EventCollector(strict=True)
100106

101107
def test_processing_instruction_only(self):
102108
self._run_check("<?processing instruction>", [
@@ -353,12 +359,11 @@ def test_cdata_content(self):
353359

354360

355361
def test_entityrefs_in_attributes(self):
356-
self._run_check("<html foo='&euro;&amp;&#97;&#x61;&unsupported;'>", [
357-
("starttag", "html", [("foo", "\u20AC&aa&unsupported;")])
358-
])
362+
self._run_check("<html foo='&euro;&amp;&#97;&#x61;&unsupported;'>",
363+
[("starttag", "html", [("foo", "\u20AC&aa&unsupported;")])])
359364

360365

361-
class HTMLParserTolerantTestCase(TestCaseBase):
366+
class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
362367

363368
def get_collector(self):
364369
return EventCollector(strict=False)
@@ -374,8 +379,7 @@ def test_tolerant_parsing(self):
374379
('endtag', 'a'),
375380
('endtag', 'html'),
376381
('data', '\n<img src="URL><//img></html'),
377-
('endtag', 'html')],
378-
collector=self.get_collector())
382+
('endtag', 'html')])
379383

380384
def test_with_unquoted_attributes(self):
381385
# see #12008
@@ -399,22 +403,19 @@ def test_with_unquoted_attributes(self):
399403
('starttag', 'span', [('class', 'en')]), ('data', ' library'),
400404
('endtag', 'span'), ('endtag', 'a'), ('endtag', 'table')
401405
]
402-
403-
self._run_check(html, expected, collector=self.get_collector())
406+
self._run_check(html, expected)
404407

405408
def test_comma_between_attributes(self):
406409
self._run_check('<form action="/xxx.php?a=1&amp;b=2&amp", '
407410
'method="post">', [
408411
('starttag', 'form',
409412
[('action', '/xxx.php?a=1&b=2&amp'),
410-
('method', 'post')])],
411-
collector=self.get_collector())
413+
('method', 'post')])])
412414

413415
def test_weird_chars_in_unquoted_attribute_values(self):
414416
self._run_check('<form action=bogus|&#()value>', [
415417
('starttag', 'form',
416-
[('action', 'bogus|&#()value')])],
417-
collector=self.get_collector())
418+
[('action', 'bogus|&#()value')])])
418419

419420
def test_correct_detection_of_start_tags(self):
420421
# see #13273
@@ -436,7 +437,7 @@ def test_correct_detection_of_start_tags(self):
436437
('endtag', 'b'),
437438
('endtag', 'div')
438439
]
439-
self._run_check(html, expected, collector=self.get_collector())
440+
self._run_check(html, expected)
440441

441442
html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
442443
expected = [
@@ -447,7 +448,7 @@ def test_correct_detection_of_start_tags(self):
447448
('data', 'rain'),
448449
('endtag', 'a'),
449450
]
450-
self._run_check(html, expected, collector=self.get_collector())
451+
self._run_check(html, expected)
451452

452453
def test_unescape_function(self):
453454
p = html.parser.HTMLParser()
@@ -456,8 +457,9 @@ def test_unescape_function(self):
456457
# see #12888
457458
self.assertEqual(p.unescape('&#123; ' * 1050), '{ ' * 1050)
458459

460+
459461
def test_main():
460-
support.run_unittest(HTMLParserTestCase, HTMLParserTolerantTestCase)
462+
support.run_unittest(HTMLParserStrictTestCase, HTMLParserTolerantTestCase)
461463

462464

463465
if __name__ == "__main__":

0 commit comments

Comments
 (0)