Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f6ca26f

Browse files
committed
#17802: merge with 3.3.
2 parents 8a42d60 + 8e596a7 commit f6ca26f

3 files changed

Lines changed: 18 additions & 0 deletions

File tree

Lib/html/parser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ def goahead(self, end):
251251
if self.strict:
252252
self.error("EOF in middle of entity or char ref")
253253
else:
254+
k = match.end()
254255
if k <= i:
255256
k = n
256257
i = self.updatepos(i, i + 1)

Lib/test/test_htmlparser.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,20 @@ def test_correct_detection_of_start_tags(self):
535535
]
536536
self._run_check(html, expected)
537537

538+
def test_EOF_in_charref(self):
539+
# see #17802
540+
# This test checks that the UnboundLocalError reported in the issue
541+
# is not raised, however I'm not sure the returned values are correct.
542+
# Maybe HTMLParser should use self.unescape for these
543+
data = [
544+
('a&', [('data', 'a&')]),
545+
('a&b', [('data', 'ab')]),
546+
('a&b ', [('data', 'a'), ('entityref', 'b'), ('data', ' ')]),
547+
('a&b;', [('data', 'a'), ('entityref', 'b')]),
548+
]
549+
for html, expected in data:
550+
self._run_check(html, expected)
551+
538552
def test_unescape_function(self):
539553
p = self.get_collector()
540554
self.assertEqual(p.unescape('&#bad;'),'&#bad;')

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ Library
6262

6363
- Issue #14679: add an __all__ (that contains only HTMLParser) to html.parser.
6464

65+
- Issue #17802: Fix an UnboundLocalError in html.parser. Initial tests by
66+
Thomas Barlow.
67+
6568
- Issue #17358: Modules loaded by imp.load_source() and load_compiled() (and by
6669
extention load_module()) now have a better chance of working when reloaded.
6770

0 commit comments

Comments
 (0)