Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ee32931

Browse files
committed
Issue #16089: Allow ElementTree.TreeBuilder to work again with a non-Element element_factory (fixes a regression in SimpleTAL).
1 parent 3934b61 commit ee32931

4 files changed

Lines changed: 127 additions & 31 deletions

File tree

Lib/test/test_xml_etree.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1893,10 +1893,23 @@ class TreeBuilderTest(unittest.TestCase):
18931893
sample1 = ('<!DOCTYPE html PUBLIC'
18941894
' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
18951895
' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
1896-
'<html>text</html>')
1896+
'<html>text<div>subtext</div>tail</html>')
18971897

18981898
sample2 = '''<toplevel>sometext</toplevel>'''
18991899

1900+
def _check_sample1_element(self, e):
1901+
self.assertEqual(e.tag, 'html')
1902+
self.assertEqual(e.text, 'text')
1903+
self.assertEqual(e.tail, None)
1904+
self.assertEqual(e.attrib, {})
1905+
children = list(e)
1906+
self.assertEqual(len(children), 1)
1907+
child = children[0]
1908+
self.assertEqual(child.tag, 'div')
1909+
self.assertEqual(child.text, 'subtext')
1910+
self.assertEqual(child.tail, 'tail')
1911+
self.assertEqual(child.attrib, {})
1912+
19001913
def test_dummy_builder(self):
19011914
class BaseDummyBuilder:
19021915
def close(self):
@@ -1929,7 +1942,7 @@ def foobar(self, x):
19291942
parser.feed(self.sample1)
19301943

19311944
e = parser.close()
1932-
self.assertEqual(e.tag, 'html')
1945+
self._check_sample1_element(e)
19331946

19341947
def test_element_factory(self):
19351948
lst = []
@@ -1945,6 +1958,33 @@ def myfactory(tag, attrib):
19451958

19461959
self.assertEqual(lst, ['toplevel'])
19471960

1961+
def _check_element_factory_class(self, cls):
1962+
tb = ET.TreeBuilder(element_factory=cls)
1963+
1964+
parser = ET.XMLParser(target=tb)
1965+
parser.feed(self.sample1)
1966+
e = parser.close()
1967+
self.assertIsInstance(e, cls)
1968+
self._check_sample1_element(e)
1969+
1970+
def test_element_factory_subclass(self):
1971+
class MyElement(ET.Element):
1972+
pass
1973+
self._check_element_factory_class(MyElement)
1974+
1975+
def test_element_factory_pure_python_subclass(self):
1976+
# Mimick SimpleTAL's behaviour (issue #16089): both versions of
1977+
# TreeBuilder should be able to cope with a subclass of the
1978+
# pure Python Element class.
1979+
base = ET._Element
1980+
# Not from a C extension
1981+
self.assertEqual(base.__module__, 'xml.etree.ElementTree')
1982+
# Force some multiple inheritance with a C class to make things
1983+
# more interesting.
1984+
class MyElement(base, ValueError):
1985+
pass
1986+
self._check_element_factory_class(MyElement)
1987+
19481988
def test_doctype(self):
19491989
class DoctypeParser:
19501990
_doctype = None

Lib/xml/etree/ElementTree.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,9 @@ def insert(self, index, element):
303303
self._children.insert(index, element)
304304

305305
def _assert_is_element(self, e):
306-
if not isinstance(e, Element):
306+
# Need to refer to the actual Python implementation, not the
307+
# shadowing C implementation.
308+
if not isinstance(e, _Element):
307309
raise TypeError('expected an Element, not %s' % type(e).__name__)
308310

309311
##

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ Core and Builtins
3434
Library
3535
-------
3636

37+
- Issue #16089: Allow ElementTree.TreeBuilder to work again with a non-Element
38+
element_factory (fixes a regression in SimpleTAL).
39+
3740
- Issue #16034: Fix performance regressions in the new `bz2.BZ2File`
3841
implementation. Initial patch by Serhiy Storchaka.
3942

Modules/_elementtree.c

Lines changed: 79 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1997,8 +1997,8 @@ typedef struct {
19971997

19981998
PyObject *root; /* root node (first created node) */
19991999

2000-
ElementObject *this; /* current node */
2001-
ElementObject *last; /* most recently created node */
2000+
PyObject *this; /* current node */
2001+
PyObject *last; /* most recently created node */
20022002

20032003
PyObject *data; /* data collector (string or list), or NULL */
20042004

@@ -2030,9 +2030,9 @@ treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
20302030
t->root = NULL;
20312031

20322032
Py_INCREF(Py_None);
2033-
t->this = (ElementObject *)Py_None;
2033+
t->this = Py_None;
20342034
Py_INCREF(Py_None);
2035-
t->last = (ElementObject *)Py_None;
2035+
t->last = Py_None;
20362036

20372037
t->data = NULL;
20382038
t->element_factory = NULL;
@@ -2112,6 +2112,64 @@ treebuilder_dealloc(TreeBuilderObject *self)
21122112
Py_TYPE(self)->tp_free((PyObject *)self);
21132113
}
21142114

2115+
/* -------------------------------------------------------------------- */
2116+
/* helpers for handling of arbitrary element-like objects */
2117+
2118+
static int
2119+
treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2120+
PyObject **dest, _Py_Identifier *name)
2121+
{
2122+
if (Element_CheckExact(element)) {
2123+
Py_DECREF(JOIN_OBJ(*dest));
2124+
*dest = JOIN_SET(data, PyList_CheckExact(data));
2125+
return 0;
2126+
}
2127+
else {
2128+
PyObject *joined = list_join(data);
2129+
int r;
2130+
if (joined == NULL)
2131+
return -1;
2132+
r = _PyObject_SetAttrId(element, name, joined);
2133+
Py_DECREF(joined);
2134+
return r;
2135+
}
2136+
}
2137+
2138+
/* These two functions steal a reference to data */
2139+
static int
2140+
treebuilder_set_element_text(PyObject *element, PyObject *data)
2141+
{
2142+
_Py_IDENTIFIER(text);
2143+
return treebuilder_set_element_text_or_tail(
2144+
element, data, &((ElementObject *) element)->text, &PyId_text);
2145+
}
2146+
2147+
static int
2148+
treebuilder_set_element_tail(PyObject *element, PyObject *data)
2149+
{
2150+
_Py_IDENTIFIER(tail);
2151+
return treebuilder_set_element_text_or_tail(
2152+
element, data, &((ElementObject *) element)->tail, &PyId_tail);
2153+
}
2154+
2155+
static int
2156+
treebuilder_add_subelement(PyObject *element, PyObject *child)
2157+
{
2158+
_Py_IDENTIFIER(append);
2159+
if (Element_CheckExact(element)) {
2160+
ElementObject *elem = (ElementObject *) element;
2161+
return element_add_subelement(elem, child);
2162+
}
2163+
else {
2164+
PyObject *res;
2165+
res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2166+
if (res == NULL)
2167+
return -1;
2168+
Py_DECREF(res);
2169+
return 0;
2170+
}
2171+
}
2172+
21152173
/* -------------------------------------------------------------------- */
21162174
/* handlers */
21172175

@@ -2124,15 +2182,12 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
21242182

21252183
if (self->data) {
21262184
if (self->this == self->last) {
2127-
Py_DECREF(JOIN_OBJ(self->last->text));
2128-
self->last->text = JOIN_SET(
2129-
self->data, PyList_CheckExact(self->data)
2130-
);
2131-
} else {
2132-
Py_DECREF(JOIN_OBJ(self->last->tail));
2133-
self->last->tail = JOIN_SET(
2134-
self->data, PyList_CheckExact(self->data)
2135-
);
2185+
if (treebuilder_set_element_text(self->last, self->data))
2186+
return NULL;
2187+
}
2188+
else {
2189+
if (treebuilder_set_element_tail(self->last, self->data))
2190+
return NULL;
21362191
}
21372192
self->data = NULL;
21382193
}
@@ -2146,10 +2201,10 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
21462201
return NULL;
21472202
}
21482203

2149-
this = (PyObject*) self->this;
2204+
this = self->this;
21502205

21512206
if (this != Py_None) {
2152-
if (element_add_subelement((ElementObject*) this, node) < 0)
2207+
if (treebuilder_add_subelement(this, node) < 0)
21532208
goto error;
21542209
} else {
21552210
if (self->root) {
@@ -2175,11 +2230,11 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
21752230

21762231
Py_DECREF(this);
21772232
Py_INCREF(node);
2178-
self->this = (ElementObject*) node;
2233+
self->this = node;
21792234

21802235
Py_DECREF(self->last);
21812236
Py_INCREF(node);
2182-
self->last = (ElementObject*) node;
2237+
self->last = node;
21832238

21842239
if (self->start_event_obj) {
21852240
PyObject* res;
@@ -2203,7 +2258,7 @@ LOCAL(PyObject*)
22032258
treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
22042259
{
22052260
if (!self->data) {
2206-
if (self->last == (ElementObject*) Py_None) {
2261+
if (self->last == Py_None) {
22072262
/* ignore calls to data before the first call to start */
22082263
Py_RETURN_NONE;
22092264
}
@@ -2243,15 +2298,11 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
22432298

22442299
if (self->data) {
22452300
if (self->this == self->last) {
2246-
Py_DECREF(JOIN_OBJ(self->last->text));
2247-
self->last->text = JOIN_SET(
2248-
self->data, PyList_CheckExact(self->data)
2249-
);
2301+
if (treebuilder_set_element_text(self->last, self->data))
2302+
return NULL;
22502303
} else {
2251-
Py_DECREF(JOIN_OBJ(self->last->tail));
2252-
self->last->tail = JOIN_SET(
2253-
self->data, PyList_CheckExact(self->data)
2254-
);
2304+
if (treebuilder_set_element_tail(self->last, self->data))
2305+
return NULL;
22552306
}
22562307
self->data = NULL;
22572308
}
@@ -2271,8 +2322,8 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
22712322

22722323
Py_DECREF(self->last);
22732324

2274-
self->last = (ElementObject*) self->this;
2275-
self->this = (ElementObject*) item;
2325+
self->last = self->this;
2326+
self->this = item;
22762327

22772328
if (self->end_event_obj) {
22782329
PyObject* res;

0 commit comments

Comments
 (0)