diff --git a/Lib/_markupbase.py b/Lib/_markupbase.py
index 3ad7e279960f7e..bb0e62fd60bdff 100644
--- a/Lib/_markupbase.py
+++ b/Lib/_markupbase.py
@@ -10,12 +10,12 @@
_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
_commentclose = re.compile(r'--\s*>')
-_markedsectionclose = re.compile(r']\s*]\s*>')
+_markedsectionclose = re.compile(r'](\s*]\s*>)')
# An analysis of the MS-Word extensions is available at
# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf
-_msmarkedsectionclose = re.compile(r']\s*>')
+_msmarkedsectionclose = re.compile(r'(]\s*>)')
del re
@@ -157,7 +157,7 @@ def parse_marked_section(self, i, report=1):
if not match:
return -1
if report:
- j = match.start(0)
+ j = match.start(1)
self.unknown_decl(rawdata[i+3: j])
return match.end(0)
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index 12917755a56017..54e212b4800dfa 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -315,6 +315,14 @@ def get_events(self):
("endtag", element_lower)],
collector=Collector(convert_charrefs=False))
+ def test_cdata_decl(self):
+ self._run_check('',
+ [('starttag', 'math', []),
+ ('starttag', 'ms', []),
+ ('unknown decl', 'CDATA[x"
''
diff --git a/Misc/NEWS.d/next/Library/2021-03-06-13-23-34.bpo-0.jzVmiO.rst b/Misc/NEWS.d/next/Library/2021-03-06-13-23-34.bpo-0.jzVmiO.rst
new file mode 100644
index 00000000000000..ede8ce0b5377e4
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-03-06-13-23-34.bpo-0.jzVmiO.rst
@@ -0,0 +1 @@
+Fix html.parser dropping closing square bracket when passing CDATA content into unknown_decl method.
\ No newline at end of file