diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index 1e30956fe24f83..ba416e7fa6e3fe 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -27,6 +27,7 @@
attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?')
starttagopen = re.compile('<[a-zA-Z]')
+endtagopen = re.compile('[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
# Note:
@@ -195,7 +196,7 @@ def goahead(self, end):
k = self.parse_pi(i)
elif startswith("', i + 1)
- if k < 0:
- k = rawdata.find('<', i + 1)
- if k < 0:
- k = i + 1
- else:
- k += 1
- if self.convert_charrefs and not self.cdata_elem:
- self.handle_data(unescape(rawdata[i:k]))
+ if starttagopen.match(rawdata, i): # < + letter
+ pass
+ elif startswith("", i):
+ if i + 2 == n:
+ self.handle_data("")
+ elif endtagopen.match(rawdata, i): # + letter
+ pass
+ else:
+ # bogus comment
+ self.handle_comment(rawdata[i+2:])
+ elif startswith("', [('comment', '-!>')]),
+ (''
''
''
@@ -604,6 +640,7 @@ def test_bogus_comments(self):
'' # required '[' after CDATA
)
expected = [
+ ('comment', 'ELEMENT br EMPTY'),
('comment', ' not really a comment '),
('comment', ' not a comment either --'),
('comment', ' -- close enough --'),
@@ -684,6 +721,26 @@ def test_convert_charrefs_dropped_text(self):
('endtag', 'a'), ('data', ' bar & baz')]
)
+ @support.requires_resource('cpu')
+ def test_eof_no_quadratic_complexity(self):
+ # Each of these examples used to take about an hour.
+ # Now they take a fraction of a second.
+ def check(source):
+ parser = html.parser.HTMLParser()
+ parser.feed(source)
+ parser.close()
+ n = 120_000
+ check("