Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8a80502

Browse files
committed
Issue #15296: Fix minidom.toxml/toprettyxml for non-unicode encodings. Patch by Serhiy Storchaka, with some minor style adjustments by me.
1 parent b674dcf commit 8a80502

3 files changed

Lines changed: 18 additions & 16 deletions

File tree

Doc/library/xml.dom.minidom.rst

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,7 @@ module documentation. This section lists the differences between the API and
147147
the DOM node.
148148

149149
With an explicit *encoding* [1]_ argument, the result is a byte
150-
string in the specified encoding. It is recommended that you
151-
always specify an encoding; you may use any encoding you like, but
152-
an argument of "utf-8" is the most common choice, avoiding
153-
:exc:`UnicodeError` exceptions in case of unrepresentable text
154-
data.
155-
150+
string in the specified encoding.
156151
With no *encoding* argument, the result is a Unicode string, and the
157152
XML declaration in the resulting string does not specify an
158153
encoding. Encoding this string in an encoding other than UTF-8 is

Lib/test/test_minidom.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,11 @@ def testEncodings(self):
10671067
b'<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>')
10681068
self.assertEqual(doc.toxml('iso-8859-15'),
10691069
b'<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>')
1070+
self.assertEqual(doc.toxml('us-ascii'),
1071+
b'<?xml version="1.0" encoding="us-ascii"?><foo>&#8364;</foo>')
1072+
self.assertEqual(doc.toxml('utf-16'),
1073+
'<?xml version="1.0" encoding="utf-16"?>'
1074+
'<foo>\u20ac</foo>'.encode('utf-16'))
10701075

10711076
# Verify that character decoding errors throw exceptions instead
10721077
# of crashing

Lib/xml/dom/minidom.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
* SAX 2 namespaces
1515
"""
1616

17-
import codecs
1817
import io
1918
import xml.dom
2019

@@ -47,19 +46,22 @@ def toxml(self, encoding=None):
4746
return self.toprettyxml("", "", encoding)
4847

4948
def toprettyxml(self, indent="\t", newl="\n", encoding=None):
50-
# indent = the indentation string to prepend, per level
51-
# newl = the newline string to append
52-
use_encoding = "utf-8" if encoding is None else encoding
53-
writer = codecs.getwriter(use_encoding)(io.BytesIO())
49+
if encoding is None:
50+
writer = io.StringIO()
51+
else:
52+
writer = io.TextIOWrapper(io.BytesIO(),
53+
encoding=encoding,
54+
errors="xmlcharrefreplace",
55+
newline='\n')
5456
if self.nodeType == Node.DOCUMENT_NODE:
5557
# Can pass encoding only to document, to put it into XML header
5658
self.writexml(writer, "", indent, newl, encoding)
5759
else:
5860
self.writexml(writer, "", indent, newl)
5961
if encoding is None:
60-
return writer.stream.getvalue().decode(use_encoding)
62+
return writer.getvalue()
6163
else:
62-
return writer.stream.getvalue()
64+
return writer.detach().getvalue()
6365

6466
def hasChildNodes(self):
6567
return bool(self.childNodes)
@@ -1788,12 +1790,12 @@ def importNode(self, node, deep):
17881790
raise xml.dom.NotSupportedErr("cannot import document type nodes")
17891791
return _clone_node(node, deep, self)
17901792

1791-
def writexml(self, writer, indent="", addindent="", newl="",
1792-
encoding = None):
1793+
def writexml(self, writer, indent="", addindent="", newl="", encoding=None):
17931794
if encoding is None:
17941795
writer.write('<?xml version="1.0" ?>'+newl)
17951796
else:
1796-
writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
1797+
writer.write('<?xml version="1.0" encoding="%s"?>%s' % (
1798+
encoding, newl))
17971799
for node in self.childNodes:
17981800
node.writexml(writer, indent, addindent, newl)
17991801

0 commit comments

Comments
 (0)