From dd6c84cb4f74fd5773933acc71e65f6c0d215305 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 14 Aug 2023 20:01:44 +0300 Subject: [PATCH 1/3] gh-50002: xml.dom.minidom now preserves whitespaces in attributes Also double quotes (") are now only quoted in attributes. --- Lib/test/test_minidom.py | 40 +++++++++++++++++++ Lib/xml/dom/minidom.py | 29 ++++++++++---- ...3-08-14-20-01-14.gh-issue-50002.E-bpj8.rst | 1 + ...3-08-14-20-18-59.gh-issue-81555.cWdP4a.rst | 1 + 4 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-08-14-20-01-14.gh-issue-50002.E-bpj8.rst create mode 100644 Misc/NEWS.d/next/Library/2023-08-14-20-18-59.gh-issue-81555.cWdP4a.rst diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index 699265ccadc7f9..a669d0720aab6b 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -505,6 +505,46 @@ def testWriteXML(self): dom.unlink() self.confirm(str == domstr) + def test_toxml_quote_text(self): + dom = Document() + elem = dom.appendChild(dom.createElement('elem')) + elem.appendChild(dom.createTextNode('&<>"')) + cr = elem.appendChild(dom.createElement('cr')) + cr.appendChild(dom.createTextNode('\r')) + crlf = elem.appendChild(dom.createElement('crlf')) + crlf.appendChild(dom.createTextNode('\r\n')) + lflf = elem.appendChild(dom.createElement('lflf')) + lflf.appendChild(dom.createTextNode('\n\n')) + ws = elem.appendChild(dom.createElement('ws')) + ws.appendChild(dom.createTextNode('\t\n\r ')) + domstr = dom.toxml() + dom.unlink() + self.assertEqual(domstr, '' + '&<>"' + '\r' + '\r\n' + '\n\n' + '\t\n\r ') + + def test_toxml_quote_attrib(self): + dom = Document() + elem = dom.appendChild(dom.createElement('elem')) + elem.setAttribute("a", '&<>"') + elem.setAttribute("cr", "\r") + elem.setAttribute("lf", "\n") + elem.setAttribute("crlf", "\r\n") + elem.setAttribute("lflf", "\n\n") + elem.setAttribute("ws", "\t\n\r ") + domstr = dom.toxml() + dom.unlink() + self.assertEqual(domstr, '' + '') + def testAltNewline(self): str = '\n\n' dom = parseString(str) diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index ef8a159833bbc0..219616d8908592 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -300,12 +300,27 @@ def _in_document(node): node = node.parentNode return False -def _write_data(writer, data): +def _write_data(writer, text, attr): "Writes datachars to writer." - if data: - data = data.replace("&", "&").replace("<", "<"). \ - replace("\"", """).replace(">", ">") - writer.write(data) + if text: + # See comments in ElementTree.py about behavior and + # implementation details. + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if attr: + if '"' in text: + text = text.replace('"', """) + if "\r" in text: + text = text.replace("\r", " ") + if "\n" in text: + text = text.replace("\n", " ") + if "\t" in text: + text = text.replace("\t", " ") + writer.write(text) def _get_elements_by_tagName_helper(parent, name, rc): for node in parent.childNodes: @@ -883,7 +898,7 @@ def writexml(self, writer, indent="", addindent="", newl=""): for a_name in attrs.keys(): writer.write(" %s=\"" % a_name) - _write_data(writer, attrs[a_name].value) + _write_data(writer, attrs[a_name].value, True) writer.write("\"") if self.childNodes: writer.write(">") @@ -1112,7 +1127,7 @@ def splitText(self, offset): return newText def writexml(self, writer, indent="", addindent="", newl=""): - _write_data(writer, "%s%s%s" % (indent, self.data, newl)) + _write_data(writer, "%s%s%s" % (indent, self.data, newl), False) # DOM Level 3 (WD 9 April 2002) diff --git a/Misc/NEWS.d/next/Library/2023-08-14-20-01-14.gh-issue-50002.E-bpj8.rst b/Misc/NEWS.d/next/Library/2023-08-14-20-01-14.gh-issue-50002.E-bpj8.rst new file mode 100644 index 00000000000000..ca5c0740802eae --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-14-20-01-14.gh-issue-50002.E-bpj8.rst @@ -0,0 +1 @@ +:mod:`xml.dom.minidom` now preserves whitespaces in attributes. diff --git a/Misc/NEWS.d/next/Library/2023-08-14-20-18-59.gh-issue-81555.cWdP4a.rst b/Misc/NEWS.d/next/Library/2023-08-14-20-18-59.gh-issue-81555.cWdP4a.rst new file mode 100644 index 00000000000000..241a50f8b41c2b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-14-20-18-59.gh-issue-81555.cWdP4a.rst @@ -0,0 +1 @@ +:mod:`xml.dom.minidom` now only quotes ``"`` in attributes. From 6dd15a49184b75f9eb4c7858c3b2ae3c357fdae4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 15 Aug 2023 10:37:40 +0300 Subject: [PATCH 2/3] Tiny refactoring. --- Lib/xml/dom/minidom.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 219616d8908592..87420568194cdc 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -302,25 +302,26 @@ def _in_document(node): def _write_data(writer, text, attr): "Writes datachars to writer." - if text: - # See comments in ElementTree.py about behavior and - # implementation details. - if "&" in text: - text = text.replace("&", "&") - if "<" in text: - text = text.replace("<", "<") - if ">" in text: - text = text.replace(">", ">") - if attr: - if '"' in text: - text = text.replace('"', """) - if "\r" in text: - text = text.replace("\r", " ") - if "\n" in text: - text = text.replace("\n", " ") - if "\t" in text: - text = text.replace("\t", " ") - writer.write(text) + if not text: + return + # See the comments in ElementTree.py for behavior and + # implementation details. + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if attr: + if '"' in text: + text = text.replace('"', """) + if "\r" in text: + text = text.replace("\r", " ") + if "\n" in text: + text = text.replace("\n", " ") + if "\t" in text: + text = text.replace("\t", " ") + writer.write(text) def _get_elements_by_tagName_helper(parent, name, rc): for node in parent.childNodes: From b926f3622541a197814d26d2e1a95afc7fc6767d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 23 Aug 2023 11:42:40 +0300 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: scoder --- Lib/test/test_minidom.py | 2 +- Lib/xml/dom/minidom.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index a669d0720aab6b..3ecd1af31eea77 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -543,7 +543,7 @@ def test_toxml_quote_attrib(self): 'lf=" " ' 'crlf=" " ' 'lflf=" " ' - 'ws=" "/>') + 'ws=" "/>') def testAltNewline(self): str = '\n\n' diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 87420568194cdc..db51f350ea0153 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -320,7 +320,7 @@ def _write_data(writer, text, attr): if "\n" in text: text = text.replace("\n", " ") if "\t" in text: - text = text.replace("\t", " ") + text = text.replace("\t", " ") writer.write(text) def _get_elements_by_tagName_helper(parent, name, rc):