Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 698bdb2

Browse files
committed
Issue #16076: make _elementtree.Element pickle-able in a way that is compatible
with the Python version of the class. Patch by Daniel Shahaf.
1 parent 0dceb56 commit 698bdb2

3 files changed

Lines changed: 239 additions & 23 deletions

File tree

Lib/test/test_xml_etree.py

Lines changed: 61 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,20 @@
1616

1717
import html
1818
import io
19+
import operator
1920
import pickle
2021
import sys
2122
import unittest
2223
import weakref
2324

25+
from itertools import product
2426
from test import support
2527
from test.support import TESTFN, findfile, unlink, import_fresh_module, gc_collect
2628

29+
# pyET is the pure-Python implementation.
30+
#
31+
# ET is pyET in test_xml_etree and is the C accelerated version in
32+
# test_xml_etree_c.
2733
pyET = None
2834
ET = None
2935

@@ -171,6 +177,38 @@ def check_element(element):
171177
for elem in element:
172178
check_element(elem)
173179

180+
class ElementTestCase:
181+
@classmethod
182+
def setUpClass(cls):
183+
cls.modules = {pyET, ET}
184+
185+
def pickleRoundTrip(self, obj, name, dumper, loader):
186+
save_m = sys.modules[name]
187+
try:
188+
sys.modules[name] = dumper
189+
temp = pickle.dumps(obj)
190+
sys.modules[name] = loader
191+
result = pickle.loads(temp)
192+
except pickle.PicklingError as pe:
193+
# pyET must be second, because pyET may be (equal to) ET.
194+
human = dict([(ET, "cET"), (pyET, "pyET")])
195+
raise support.TestFailed("Failed to round-trip %r from %r to %r"
196+
% (obj,
197+
human.get(dumper, dumper),
198+
human.get(loader, loader))) from pe
199+
finally:
200+
sys.modules[name] = save_m
201+
return result
202+
203+
def assertEqualElements(self, alice, bob):
204+
self.assertIsInstance(alice, (ET.Element, pyET.Element))
205+
self.assertIsInstance(bob, (ET.Element, pyET.Element))
206+
self.assertEqual(len(list(alice)), len(list(bob)))
207+
for x, y in zip(alice, bob):
208+
self.assertEqualElements(x, y)
209+
properties = operator.attrgetter('tag', 'tail', 'text', 'attrib')
210+
self.assertEqual(properties(alice), properties(bob))
211+
174212
# --------------------------------------------------------------------
175213
# element tree tests
176214

@@ -1715,7 +1753,7 @@ def check_issue10777():
17151753
# --------------------------------------------------------------------
17161754

17171755

1718-
class BasicElementTest(unittest.TestCase):
1756+
class BasicElementTest(ElementTestCase, unittest.TestCase):
17191757
def test_augmentation_type_errors(self):
17201758
e = ET.Element('joe')
17211759
self.assertRaises(TypeError, e.append, 'b')
@@ -1775,19 +1813,22 @@ def test_get_keyword_args(self):
17751813
self.assertEqual(e1.get('w', default=7), 7)
17761814

17771815
def test_pickle(self):
1778-
# For now this test only works for the Python version of ET,
1779-
# so set sys.modules accordingly because pickle uses __import__
1780-
# to load the __module__ of the class.
1781-
if pyET:
1782-
sys.modules['xml.etree.ElementTree'] = pyET
1783-
else:
1784-
raise unittest.SkipTest('only for the Python version')
1785-
e1 = ET.Element('foo', bar=42)
1786-
s = pickle.dumps(e1)
1787-
e2 = pickle.loads(s)
1788-
self.assertEqual(e2.tag, 'foo')
1789-
self.assertEqual(e2.attrib['bar'], 42)
1790-
1816+
# issue #16076: the C implementation wasn't pickleable.
1817+
for dumper, loader in product(self.modules, repeat=2):
1818+
e = dumper.Element('foo', bar=42)
1819+
e.text = "text goes here"
1820+
e.tail = "opposite of head"
1821+
dumper.SubElement(e, 'child').append(dumper.Element('grandchild'))
1822+
e.append(dumper.Element('child'))
1823+
e.findall('.//grandchild')[0].set('attr', 'other value')
1824+
1825+
e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree',
1826+
dumper, loader)
1827+
1828+
self.assertEqual(e2.tag, 'foo')
1829+
self.assertEqual(e2.attrib['bar'], 42)
1830+
self.assertEqual(len(e2), 2)
1831+
self.assertEqualElements(e, e2)
17911832

17921833
class ElementTreeTest(unittest.TestCase):
17931834
def test_istype(self):
@@ -2433,7 +2474,7 @@ def test_issue14818(self):
24332474
class NoAcceleratorTest(unittest.TestCase):
24342475
def setUp(self):
24352476
if not pyET:
2436-
raise SkipTest('only for the Python version')
2477+
raise unittest.SkipTest('only for the Python version')
24372478

24382479
# Test that the C accelerator was not imported for pyET
24392480
def test_correct_import_pyET(self):
@@ -2486,10 +2527,10 @@ def __exit__(self, *args):
24862527
def test_main(module=None):
24872528
# When invoked without a module, runs the Python ET tests by loading pyET.
24882529
# Otherwise, uses the given module as the ET.
2530+
global pyET
2531+
pyET = import_fresh_module('xml.etree.ElementTree',
2532+
blocked=['_elementtree'])
24892533
if module is None:
2490-
global pyET
2491-
pyET = import_fresh_module('xml.etree.ElementTree',
2492-
blocked=['_elementtree'])
24932534
module = pyET
24942535

24952536
global ET
@@ -2509,7 +2550,7 @@ def test_main(module=None):
25092550
# These tests will only run for the pure-Python version that doesn't import
25102551
# _elementtree. We can't use skipUnless here, because pyET is filled in only
25112552
# after the module is loaded.
2512-
if pyET:
2553+
if pyET is not ET:
25132554
test_classes.extend([
25142555
NoAcceleratorTest,
25152556
])
@@ -2518,7 +2559,7 @@ def test_main(module=None):
25182559
support.run_unittest(*test_classes)
25192560

25202561
# XXX the C module should give the same warnings as the Python module
2521-
with CleanContext(quiet=(module is not pyET)):
2562+
with CleanContext(quiet=(pyET is not ET)):
25222563
support.run_doctest(sys.modules[__name__], verbosity=True)
25232564
finally:
25242565
# don't interfere with subsequent tests

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,6 +1080,7 @@ Jerry Seutter
10801080
Pete Sevander
10811081
Denis Severson
10821082
Ian Seyer
1083+
Daniel Shahaf
10831084
Ha Shao
10841085
Mark Shannon
10851086
Richard Shapiro

Modules/_elementtree.c

Lines changed: 177 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,176 @@ element_sizeof(PyObject* _self, PyObject* args)
814814
return PyLong_FromSsize_t(result);
815815
}
816816

817+
/* dict keys for getstate/setstate. */
818+
#define PICKLED_TAG "tag"
819+
#define PICKLED_CHILDREN "_children"
820+
#define PICKLED_ATTRIB "attrib"
821+
#define PICKLED_TAIL "tail"
822+
#define PICKLED_TEXT "text"
823+
824+
/* __getstate__ returns a fabricated instance dict as in the pure-Python
825+
* Element implementation, for interoperability/interchangeability. This
826+
* makes the pure-Python implementation details an API, but (a) there aren't
827+
* any unnecessary structures there; and (b) it buys compatibility with 3.2
828+
* pickles. See issue #16076.
829+
*/
830+
static PyObject *
831+
element_getstate(ElementObject *self)
832+
{
833+
int i, noattrib;
834+
PyObject *instancedict = NULL, *children;
835+
836+
/* Build a list of children. */
837+
children = PyList_New(self->extra ? self->extra->length : 0);
838+
if (!children)
839+
return NULL;
840+
for (i = 0; i < PyList_GET_SIZE(children); i++) {
841+
PyObject *child = self->extra->children[i];
842+
Py_INCREF(child);
843+
PyList_SET_ITEM(children, i, child);
844+
}
845+
846+
/* Construct the state object. */
847+
noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
848+
if (noattrib)
849+
instancedict = Py_BuildValue("{sOsOs{}sOsO}",
850+
PICKLED_TAG, self->tag,
851+
PICKLED_CHILDREN, children,
852+
PICKLED_ATTRIB,
853+
PICKLED_TEXT, self->text,
854+
PICKLED_TAIL, self->tail);
855+
else
856+
instancedict = Py_BuildValue("{sOsOsOsOsO}",
857+
PICKLED_TAG, self->tag,
858+
PICKLED_CHILDREN, children,
859+
PICKLED_ATTRIB, self->extra->attrib,
860+
PICKLED_TEXT, self->text,
861+
PICKLED_TAIL, self->tail);
862+
if (instancedict)
863+
return instancedict;
864+
else {
865+
for (i = 0; i < PyList_GET_SIZE(children); i++)
866+
Py_DECREF(PyList_GET_ITEM(children, i));
867+
Py_DECREF(children);
868+
869+
return NULL;
870+
}
871+
}
872+
873+
static PyObject *
874+
element_setstate_from_attributes(ElementObject *self,
875+
PyObject *tag,
876+
PyObject *attrib,
877+
PyObject *text,
878+
PyObject *tail,
879+
PyObject *children)
880+
{
881+
Py_ssize_t i, nchildren;
882+
883+
if (!tag) {
884+
PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
885+
return NULL;
886+
}
887+
if (!text) {
888+
Py_INCREF(Py_None);
889+
text = Py_None;
890+
}
891+
if (!tail) {
892+
Py_INCREF(Py_None);
893+
tail = Py_None;
894+
}
895+
896+
Py_CLEAR(self->tag);
897+
self->tag = tag;
898+
Py_INCREF(self->tag);
899+
900+
Py_CLEAR(self->text);
901+
self->text = text;
902+
Py_INCREF(self->text);
903+
904+
Py_CLEAR(self->tail);
905+
self->tail = tail;
906+
Py_INCREF(self->tail);
907+
908+
/* Handle ATTRIB and CHILDREN. */
909+
if (!children && !attrib)
910+
Py_RETURN_NONE;
911+
912+
/* Compute 'nchildren'. */
913+
if (children) {
914+
if (!PyList_Check(children)) {
915+
PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
916+
return NULL;
917+
}
918+
nchildren = PyList_Size(children);
919+
}
920+
else {
921+
nchildren = 0;
922+
}
923+
924+
/* Allocate 'extra'. */
925+
if (element_resize(self, nchildren)) {
926+
return NULL;
927+
}
928+
assert(self->extra && self->extra->allocated >= nchildren);
929+
930+
/* Copy children */
931+
for (i = 0; i < nchildren; i++) {
932+
self->extra->children[i] = PyList_GET_ITEM(children, i);
933+
Py_INCREF(self->extra->children[i]);
934+
}
935+
936+
self->extra->length = nchildren;
937+
self->extra->allocated = nchildren;
938+
939+
/* Stash attrib. */
940+
if (attrib) {
941+
Py_CLEAR(self->extra->attrib);
942+
self->extra->attrib = attrib;
943+
Py_INCREF(attrib);
944+
}
945+
946+
Py_RETURN_NONE;
947+
}
948+
949+
/* __setstate__ for Element instance from the Python implementation.
950+
* 'state' should be the instance dict.
951+
*/
952+
static PyObject *
953+
element_setstate_from_Python(ElementObject *self, PyObject *state)
954+
{
955+
static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
956+
PICKLED_TAIL, PICKLED_CHILDREN, 0};
957+
PyObject *args;
958+
PyObject *tag, *attrib, *text, *tail, *children;
959+
int error;
960+
961+
/* More instance dict members than we know to handle? */
962+
tag = attrib = text = tail = children = NULL;
963+
args = PyTuple_New(0);
964+
error = ! PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
965+
&attrib, &text, &tail, &children);
966+
Py_DECREF(args);
967+
if (error)
968+
return NULL;
969+
else
970+
return element_setstate_from_attributes(self, tag, attrib, text,
971+
tail, children);
972+
}
973+
974+
static PyObject *
975+
element_setstate(ElementObject *self, PyObject *state)
976+
{
977+
if (!PyDict_CheckExact(state)) {
978+
PyErr_Format(PyExc_TypeError,
979+
"Don't know how to unpickle \"%.200R\" as an Element",
980+
state);
981+
return NULL;
982+
}
983+
else
984+
return element_setstate_from_Python(self, state);
985+
}
986+
817987
LOCAL(int)
818988
checkpath(PyObject* tag)
819989
{
@@ -1587,6 +1757,8 @@ static PyMethodDef element_methods[] = {
15871757
{"__copy__", (PyCFunction) element_copy, METH_VARARGS},
15881758
{"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
15891759
{"__sizeof__", element_sizeof, METH_NOARGS},
1760+
{"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1761+
{"__setstate__", (PyCFunction)element_setstate, METH_O},
15901762

15911763
{NULL, NULL}
15921764
};
@@ -1691,7 +1863,7 @@ static PyMappingMethods element_as_mapping = {
16911863

16921864
static PyTypeObject Element_Type = {
16931865
PyVarObject_HEAD_INIT(NULL, 0)
1694-
"Element", sizeof(ElementObject), 0,
1866+
"xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
16951867
/* methods */
16961868
(destructor)element_dealloc, /* tp_dealloc */
16971869
0, /* tp_print */
@@ -1913,6 +2085,8 @@ elementiter_next(ElementIterObject *it)
19132085

19142086
static PyTypeObject ElementIter_Type = {
19152087
PyVarObject_HEAD_INIT(NULL, 0)
2088+
/* Using the module's name since the pure-Python implementation does not
2089+
have such a type. */
19162090
"_elementtree._element_iterator", /* tp_name */
19172091
sizeof(ElementIterObject), /* tp_basicsize */
19182092
0, /* tp_itemsize */
@@ -2458,7 +2632,7 @@ static PyMethodDef treebuilder_methods[] = {
24582632

24592633
static PyTypeObject TreeBuilder_Type = {
24602634
PyVarObject_HEAD_INIT(NULL, 0)
2461-
"TreeBuilder", sizeof(TreeBuilderObject), 0,
2635+
"xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
24622636
/* methods */
24632637
(destructor)treebuilder_dealloc, /* tp_dealloc */
24642638
0, /* tp_print */
@@ -3420,7 +3594,7 @@ xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
34203594

34213595
static PyTypeObject XMLParser_Type = {
34223596
PyVarObject_HEAD_INIT(NULL, 0)
3423-
"XMLParser", sizeof(XMLParserObject), 0,
3597+
"xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
34243598
/* methods */
34253599
(destructor)xmlparser_dealloc, /* tp_dealloc */
34263600
0, /* tp_print */

0 commit comments

Comments
 (0)