Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 484d9a4

Browse files
committed
Patch #1309009, Fix segfault in pyexpat when the XML document is
in latin_1, but Python incorrectly assumes it is in UTF-8 format Will backport.
1 parent aa93517 commit 484d9a4

4 files changed

Lines changed: 19 additions & 1 deletion

File tree

Lib/test/test_minidom.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,15 @@ def testEncodings():
889889
and doc.toxml('utf-8') == '<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>'
890890
and doc.toxml('iso-8859-15') == '<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>',
891891
"testEncodings - encoding EURO SIGN")
892+
893+
# Verify that character decoding errors throw exceptions instead of crashing
894+
try:
895+
doc = parseString('<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>')
896+
except UnicodeDecodeError:
897+
pass
898+
else:
899+
print 'parsing with bad encoding should raise a UnicodeDecodeError'
900+
892901
doc.unlink()
893902

894903
class UserDataHandler:

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ Flemming Kj
305305
Jiba
306306
Orjan Johansen
307307
Simon Johnston
308+
Evan Jones
308309
Richard Jones
309310
Irmen de Jong
310311
Lucas de Jonge

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,9 @@ present).
153153
Extension Modules
154154
-----------------
155155

156+
- Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1,
157+
but Python incorrectly assumes it is in UTF-8 format
158+
156159
- Fix parse errors in the readline module when compiling without threads.
157160

158161
- Patch #1288833: Removed thread lock from socket.getaddrinfo on

Modules/pyexpat.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,9 @@ string_intern(xmlparseobject *self, const char* str)
417417
{
418418
PyObject *result = STRING_CONV_FUNC(str);
419419
PyObject *value;
420+
/* result can be NULL if the unicode conversion failed. */
421+
if (!result)
422+
return result;
420423
if (!self->intern)
421424
return result;
422425
value = PyDict_GetItem(self->intern, result);
@@ -572,7 +575,9 @@ my_StartElementHandler(void *userData,
572575
Py_DECREF(v);
573576
}
574577
}
575-
args = Py_BuildValue("(NN)", string_intern(self, name), container);
578+
args = string_intern(self, name);
579+
if (args != NULL)
580+
args = Py_BuildValue("(NN)", args, container);
576581
if (args == NULL) {
577582
Py_DECREF(container);
578583
return;

0 commit comments

Comments
 (0)