diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index 58f6bb3b1e932d..94f4aaecfc61b3 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -25,6 +25,7 @@
charref = re.compile('(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
starttagopen = re.compile('<[a-zA-Z]')
+endtagopen = re.compile('[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
# Note:
@@ -176,7 +177,7 @@ def goahead(self, end):
k = self.parse_pi(i)
elif startswith("', i + 1)
- if k < 0:
- k = rawdata.find('<', i + 1)
- if k < 0:
- k = i + 1
- else:
- k += 1
- if self.convert_charrefs and not self.cdata_elem:
- self.handle_data(unescape(rawdata[i:k]))
+ if starttagopen.match(rawdata, i): # < + letter
+ pass
+ elif startswith("", i):
+ if i + 2 == n:
+ self.handle_data("")
+ elif endtagopen.match(rawdata, i): # + letter
+ pass
+ else:
+ # bogus comment
+ self.handle_comment(rawdata[i+2:])
+ elif startswith("', [('comment', '-!>')]),
+ (''
''
''
'')
expected = [
+ ('comment', 'ELEMENT br EMPTY'),
('comment', ' not really a comment '),
('comment', ' not a comment either --'),
('comment', ' -- close enough --'),
@@ -598,6 +649,26 @@ def test_convert_charrefs_dropped_text(self):
('endtag', 'a'), ('data', ' bar & baz')]
)
+ @support.requires_resource('cpu')
+ def test_eof_no_quadratic_complexity(self):
+ # Each of these examples used to take about an hour.
+ # Now they take a fraction of a second.
+ def check(source):
+ parser = html.parser.HTMLParser()
+ parser.feed(source)
+ parser.close()
+ n = 120_000
+ check("