Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 5431928

Browse files
committed
Merged revisions 78123 via svnmerge from
svn+ssh://[email protected]/python/branches/py3k ........ r78123 | antoine.pitrou | 2010-02-09 17:51:16 +0100 (mar., 09 févr. 2010) | 5 lines Issue #6233: ElementTree failed converting unicode characters to XML entities when they could't be represented in the requested output encoding. Patch by Jerry Chen. ........
1 parent 0f36573 commit 5431928

4 files changed

Lines changed: 25 additions & 6 deletions

File tree

Lib/test/test_xml_etree.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,17 @@ def check_encoding(ET, encoding):
210210
"""
211211
ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
212212

213+
def check_issue6233():
214+
"""
215+
>>> from xml.etree import ElementTree as ET
216+
217+
>>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\xe3g</body>")
218+
>>> ET.tostring(e, 'ascii')
219+
b"<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
220+
>>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\xe3g</body>".encode('iso-8859-1')) # create byte string with the right encoding
221+
>>> ET.tostring(e, 'ascii')
222+
b"<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
223+
"""
213224

214225
#
215226
# xinclude tests (samples from appendix C of the xinclude specification)

Lib/xml/etree/ElementTree.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -662,9 +662,9 @@ def _write(self, file, node, encoding, namespaces):
662662
# write XML to file
663663
tag = node.tag
664664
if tag is Comment:
665-
file.write(_encode("<!-- %s -->" % _escape_cdata(node.text), encoding))
665+
file.write(b"<!-- " + _encode_cdata(node.text, encoding) + b" -->")
666666
elif tag is ProcessingInstruction:
667-
file.write(_encode("<?%s?>" % _escape_cdata(node.text), encoding))
667+
file.write(b"<?" + _encode_cdata(node.text, encoding) + b"?>")
668668
else:
669669
items = list(node.items())
670670
xmlns_items = [] # new namespaces in this scope
@@ -696,7 +696,7 @@ def _write(self, file, node, encoding, namespaces):
696696
if node.text or len(node):
697697
file.write(_encode(">", encoding))
698698
if node.text:
699-
file.write(_encode(_escape_cdata(node.text), encoding))
699+
file.write(_encode_cdata(node.text, encoding))
700700
for n in node:
701701
self._write(file, n, encoding, namespaces)
702702
file.write(_encode("</" + tag + ">", encoding))
@@ -705,7 +705,7 @@ def _write(self, file, node, encoding, namespaces):
705705
for k, v in xmlns_items:
706706
del namespaces[v]
707707
if node.tail:
708-
file.write(_encode(_escape_cdata(node.tail), encoding))
708+
file.write(_encode_cdata(node.tail, encoding))
709709

710710
# --------------------------------------------------------------------
711711
# helpers
@@ -788,13 +788,16 @@ def escape_entities(m, map=_escape_map):
788788
# the following functions assume an ascii-compatible encoding
789789
# (or "utf-16")
790790

791-
def _escape_cdata(text):
791+
def _encode_cdata(text, encoding):
792792
# escape character data
793793
try:
794794
text = text.replace("&", "&amp;")
795795
text = text.replace("<", "&lt;")
796796
text = text.replace(">", "&gt;")
797-
return text
797+
if encoding:
798+
return text.encode(encoding, "xmlcharrefreplace")
799+
else:
800+
return text
798801
except (TypeError, AttributeError):
799802
_raise_serialization_error(text)
800803

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ Greg Chapman
126126
Brad Chapman
127127
David Chaum
128128
Nicolas Chauvat
129+
Jerry Chen
129130
Michael Chermside
130131
Albert Chin-A-Young
131132
Adal Chiriliuc

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ Core and Builtins
7979
Library
8080
-------
8181

82+
- Issue #6233: ElementTree failed converting unicode characters to XML
83+
entities when they could't be represented in the requested output
84+
encoding. Patch by Jerry Chen.
85+
8286
- Issue #4772: Raise a ValueError when an unknown Bluetooth protocol is
8387
specified, rather than fall through to AF_PACKET (in the `socket` module).
8488
Also, raise ValueError rather than TypeError when an unknown TIPC address

0 commit comments

Comments
 (0)