diff --git a/Lib/html/parser.py b/Lib/html/parser.py index ba416e7fa6e3fe..99aebc19d4a2e3 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -298,7 +298,11 @@ def parse_html_declaration(self, i): # this case is actually already handled in goahead() return self.parse_comment(i) elif rawdata[i:i+9] == '') + if j < 0: + return -1 + self.unknown_decl(rawdata[i+3: j]) + return j + 3 elif rawdata[i:i+9].lower() == ' gtpos = rawdata.find('>', i+9) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 65a4bee72b9775..b75b4c711ccac5 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -686,27 +686,27 @@ def test_broken_condcoms(self): ] self._run_check(html, expected) - def test_cdata_declarations(self): - # More tests should be added. See also "8.2.4.42. Markup - # declaration open state", "8.2.4.69. CDATA section state", - # and issue 32876 - html = ('') - expected = [('unknown decl', 'CDATA[just some plain text')] - self._run_check(html, expected) - - def test_cdata_declarations_multiline(self): - html = (' b) {' - ' printf("[How?]");' - ' }' - ']]>') - expected = [ - ('starttag', 'code', []), - ('unknown decl', - 'CDATA[ if (a < b && a > b) { ' - 'printf("[How?]"); }'), - ('endtag', 'code') - ] + @support.subTests('content', [ + 'just some plain text', + '', + '¬-an-entity-ref;', + "", + '', + '[[I have many brackets]]', + 'I have a > in the middle', + 'I have a ]] in the middle', + '] ]>', + ']] >', + ('\n' + ' if (a < b && a > b) {\n' + ' printf("[How?]");\n' + ' }\n'), + ]) + def test_cdata_section(self, content): + # See "13.2.5.42 Markup declaration open state", + # "13.2.5.69 CDATA section state", and issue bpo-32876. + html = f'' + expected = [('unknown decl', 'CDATA[' + content)] self._run_check(html, expected) def test_convert_charrefs_dropped_text(self): diff --git a/Misc/NEWS.d/next/Library/2025-06-18-13-34-55.gh-issue-135661.NZlpWf.rst b/Misc/NEWS.d/next/Library/2025-06-18-13-34-55.gh-issue-135661.NZlpWf.rst new file mode 100644 index 00000000000000..7a07e8535bb497 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-18-13-34-55.gh-issue-135661.NZlpWf.rst @@ -0,0 +1,2 @@ +Fix CDATA section parsing in :class:`html.parser.HTMLParser`: ``] ]>`` and +``]] >`` no longer end the CDATA section.