Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 7d650ca

Browse files
committed
Implement the encoding argument for toxml and toprettyxml.
Document toprettyxml.
1 parent 2ebfd09 commit 7d650ca

5 files changed

Lines changed: 68 additions & 9 deletions

File tree

Doc/lib/xmldomminidom.tex

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,45 @@ \subsection{DOM objects \label{dom-objects}}
121121
Write XML to the writer object. The writer should have a
122122
\method{write()} method which matches that of the file object
123123
interface.
124+
125+
\versionadded[To support pretty output, new keyword parameters indent,
126+
addindent, and newl have been added]{2.1}
127+
128+
\versionadded[For the \class{Document} node, an additional keyword
129+
argument encoding can be used to specify the encoding field of the XML
130+
header]{2.3}
131+
124132
\end{methoddesc}
125133

126-
\begin{methoddesc}{toxml}{}
134+
\begin{methoddesc}{toxml}{\optional{encoding}}
127135
Return the XML that the DOM represents as a string.
136+
137+
\versionadded[the \var{encoding} argument]{2.3}
138+
139+
With no argument, the XML header does not specify an encoding, and the
140+
result is Unicode string if the default encoding cannot represent all
141+
characters in the document. Encoding this string in an encoding other
142+
than UTF-8 is likely incorrect, since UTF-8 is the default encoding of
143+
XML.
144+
145+
With an explicit \var{encoding} argument, the result is a byte string
146+
in the specified encoding. It is recommended that this argument is
147+
always specified. To avoid UnicodeError exceptions in case of
148+
unrepresentable text data, the encoding argument should be specified
149+
as "utf-8".
150+
151+
\end{methoddesc}
152+
153+
\begin{methoddesc}{toprettyxml}{\optional{indent\optional{, newl}}}
154+
155+
Return a pretty-printed version of the document. \var{indent} specifies
156+
the indentation string and defaults to a tabulator; \var{newl} specifies
157+
the string emitted at the end of each line and defaults to \\n.
158+
159+
\versionadded{2.1}
160+
161+
\versionadded[the encoding argument; see \method{toxml}]{2.3}
162+
128163
\end{methoddesc}
129164

130165
The following standard DOM methods have special considerations with

Lib/test/output/test_minidom

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ Passed assertion: len(Node.allnodes) == 0
9898
Passed Test
9999
Test Succeeded testElementReprAndStr
100100
Passed assertion: len(Node.allnodes) == 0
101+
Passed testEncodings - encoding EURO SIGN
102+
Test Succeeded testEncodings
103+
Passed assertion: len(Node.allnodes) == 0
101104
Test Succeeded testFirstChild
102105
Passed assertion: len(Node.allnodes) == 0
103106
Test Succeeded testGetAttrLength

Lib/test/test_minidom.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,14 @@ def testSAX2DOM():
606606

607607
doc.unlink()
608608

609+
def testEncodings():
610+
doc = parseString('<foo>&#x20ac;</foo>')
611+
confirm(doc.toxml() == u'<?xml version="1.0" ?>\n<foo>\u20ac</foo>'
612+
and doc.toxml('utf-8') == '<?xml version="1.0" encoding="utf-8"?>\n<foo>\xe2\x82\xac</foo>'
613+
and doc.toxml('iso-8859-15') == '<?xml version="1.0" encoding="iso-8859-15"?>\n<foo>\xa4</foo>',
614+
"testEncodings - encoding EURO SIGN")
615+
doc.unlink()
616+
609617
# --- MAIN PROGRAM
610618

611619
names = globals().keys()

Lib/xml/dom/minidom.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,22 @@ def __init__(self):
6565
def __nonzero__(self):
6666
return 1
6767

68-
def toxml(self):
69-
writer = _get_StringIO()
70-
self.writexml(writer)
71-
return writer.getvalue()
68+
def toxml(self, encoding = None):
69+
return self.toprettyxml("", "", encoding)
7270

73-
def toprettyxml(self, indent="\t", newl="\n"):
71+
def toprettyxml(self, indent="\t", newl="\n", encoding = None):
7472
# indent = the indentation string to prepend, per level
7573
# newl = the newline string to append
7674
writer = _get_StringIO()
77-
self.writexml(writer, "", indent, newl)
75+
if encoding is not None:
76+
import codecs
77+
# Can't use codecs.getwriter to preserve 2.0 compatibility
78+
writer = codecs.lookup(encoding)[3](writer)
79+
if self.nodeType == Node.DOCUMENT_NODE:
80+
# Can pass encoding only to document, to put it into XML header
81+
self.writexml(writer, "", indent, newl, encoding)
82+
else:
83+
self.writexml(writer, "", indent, newl)
7884
return writer.getvalue()
7985

8086
def hasChildNodes(self):
@@ -934,8 +940,12 @@ def getElementsByTagNameNS(self, namespaceURI, localName):
934940
return _getElementsByTagNameNSHelper(self, namespaceURI, localName,
935941
NodeList())
936942

937-
def writexml(self, writer, indent="", addindent="", newl=""):
938-
writer.write('<?xml version="1.0" ?>\n')
943+
def writexml(self, writer, indent="", addindent="", newl="",
944+
encoding = None):
945+
if encoding is None:
946+
writer.write('<?xml version="1.0" ?>\n')
947+
else:
948+
writer.write('<?xml version="1.0" encoding="%s"?>\n' % encoding)
939949
for node in self.childNodes:
940950
node.writexml(writer, indent, addindent, newl)
941951

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,9 @@ Extension modules
170170

171171
Library
172172

173+
- xml.dom.minidom.toxml and toprettyxml now take an optional encoding
174+
argument.
175+
173176
- Some fixes in the copy module: when an object is copied through its
174177
__reduce__ method, there was no check for a __setstate__ method on
175178
the result [SF patch 565085]; deepcopy should treat instances of

0 commit comments

Comments
 (0)