Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 48d358b

Browse files
committed
Issue #14007: implemented the 'element_factory' feature of TreeBuilder in
_elementtree, with a test.
1 parent a0cf90e commit 48d358b

3 files changed

Lines changed: 90 additions & 37 deletions

File tree

Doc/library/xml.etree.elementtree.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -716,9 +716,9 @@ TreeBuilder Objects
716716
Generic element structure builder. This builder converts a sequence of
717717
start, data, and end method calls to a well-formed element structure. You
718718
can use this class to build an element structure using a custom XML parser,
719-
or a parser for some other XML-like format. The *element_factory* is called
720-
to create new :class:`Element` instances when given.
721-
719+
or a parser for some other XML-like format. *element_factory*, when given,
720+
must be a callable accepting two positional arguments: a tag and
721+
a dict of attributes. It is expected to return a new element instance.
722722

723723
.. method:: close()
724724

Lib/test/test_xml_etree.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1959,6 +1959,8 @@ class TreeBuilderTest(unittest.TestCase):
19591959
' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
19601960
'<html>text</html>')
19611961

1962+
sample2 = '''<toplevel>sometext</toplevel>'''
1963+
19621964
def test_dummy_builder(self):
19631965
class BaseDummyBuilder:
19641966
def close(self):
@@ -1993,11 +1995,19 @@ def foobar(self, x):
19931995
e = parser.close()
19941996
self.assertEqual(e.tag, 'html')
19951997

1996-
# XXX in _elementtree, the constructor of TreeBuilder expects no
1997-
# arguments
1998-
@unittest.expectedFailure
19991998
def test_element_factory(self):
2000-
tb = ET.TreeBuilder(element_factory=lambda: ET.Element())
1999+
lst = []
2000+
def myfactory(tag, attrib):
2001+
nonlocal lst
2002+
lst.append(tag)
2003+
return ET.Element(tag, attrib)
2004+
2005+
tb = ET.TreeBuilder(element_factory=myfactory)
2006+
parser = ET.XMLParser(target=tb)
2007+
parser.feed(self.sample2)
2008+
parser.close()
2009+
2010+
self.assertEqual(lst, ['toplevel'])
20012011

20022012
@unittest.expectedFailure # XXX issue 14007 with C ElementTree
20032013
def test_doctype(self):

Modules/_elementtree.c

Lines changed: 73 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,15 @@ list_join(PyObject* list)
191191
return result;
192192
}
193193

194+
/* Is the given object an empty dictionary?
195+
*/
196+
static int
197+
is_empty_dict(PyObject *obj)
198+
{
199+
return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
200+
}
201+
202+
194203
/* -------------------------------------------------------------------- */
195204
/* the Element type */
196205

@@ -297,14 +306,9 @@ create_new_element(PyObject* tag, PyObject* attrib)
297306
self = PyObject_GC_New(ElementObject, &Element_Type);
298307
if (self == NULL)
299308
return NULL;
300-
301-
/* use None for empty dictionaries */
302-
if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
303-
attrib = Py_None;
304-
305309
self->extra = NULL;
306310

307-
if (attrib != Py_None) {
311+
if (attrib != Py_None && !is_empty_dict(attrib)) {
308312
if (create_extra(self, attrib) < 0) {
309313
PyObject_Del(self);
310314
return NULL;
@@ -416,22 +420,14 @@ element_init(PyObject *self, PyObject *args, PyObject *kwds)
416420

417421
self_elem = (ElementObject *)self;
418422

419-
/* Use None for empty dictionaries */
420-
if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) {
421-
Py_INCREF(Py_None);
422-
attrib = Py_None;
423-
}
424-
425-
if (attrib != Py_None) {
423+
if (attrib != Py_None && !is_empty_dict(attrib)) {
426424
if (create_extra(self_elem, attrib) < 0) {
427425
PyObject_Del(self_elem);
428426
return -1;
429427
}
430428
}
431429

432-
/* If create_extra needed attrib, it took a reference to it, so we can
433-
* release ours anyway.
434-
*/
430+
/* We own a reference to attrib here and it's no longer needed. */
435431
Py_DECREF(attrib);
436432

437433
/* Replace the objects already pointed to by tag, text and tail. */
@@ -1813,6 +1809,8 @@ typedef struct {
18131809
PyObject *stack; /* element stack */
18141810
Py_ssize_t index; /* current stack size (0 means empty) */
18151811

1812+
PyObject *element_factory;
1813+
18161814
/* element tracing */
18171815
PyObject *events; /* list of events, or NULL if not collecting */
18181816
PyObject *start_event_obj; /* event objects (NULL to ignore) */
@@ -1841,6 +1839,7 @@ treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
18411839
t->last = (ElementObject *)Py_None;
18421840

18431841
t->data = NULL;
1842+
t->element_factory = NULL;
18441843
t->stack = PyList_New(20);
18451844
if (!t->stack) {
18461845
Py_DECREF(t->this);
@@ -1859,11 +1858,38 @@ treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
18591858
static int
18601859
treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
18611860
{
1861+
static char *kwlist[] = {"element_factory", NULL};
1862+
PyObject *element_factory = NULL;
1863+
TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
1864+
1865+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
1866+
&element_factory)) {
1867+
return -1;
1868+
}
1869+
1870+
if (element_factory) {
1871+
Py_INCREF(element_factory);
1872+
Py_XDECREF(self_tb->element_factory);
1873+
self_tb->element_factory = element_factory;
1874+
}
1875+
18621876
return 0;
18631877
}
18641878

1865-
static void
1866-
treebuilder_dealloc(TreeBuilderObject *self)
1879+
static int
1880+
treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
1881+
{
1882+
Py_VISIT(self->root);
1883+
Py_VISIT(self->this);
1884+
Py_VISIT(self->last);
1885+
Py_VISIT(self->data);
1886+
Py_VISIT(self->stack);
1887+
Py_VISIT(self->element_factory);
1888+
return 0;
1889+
}
1890+
1891+
static int
1892+
treebuilder_gc_clear(TreeBuilderObject *self)
18671893
{
18681894
Py_XDECREF(self->end_ns_event_obj);
18691895
Py_XDECREF(self->start_ns_event_obj);
@@ -1874,8 +1900,16 @@ treebuilder_dealloc(TreeBuilderObject *self)
18741900
Py_XDECREF(self->data);
18751901
Py_DECREF(self->last);
18761902
Py_DECREF(self->this);
1903+
Py_CLEAR(self->element_factory);
18771904
Py_XDECREF(self->root);
1905+
return 0;
1906+
}
18781907

1908+
static void
1909+
treebuilder_dealloc(TreeBuilderObject *self)
1910+
{
1911+
PyObject_GC_UnTrack(self);
1912+
treebuilder_gc_clear(self);
18791913
Py_TYPE(self)->tp_free((PyObject *)self);
18801914
}
18811915

@@ -1904,9 +1938,14 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
19041938
self->data = NULL;
19051939
}
19061940

1907-
node = create_new_element(tag, attrib);
1908-
if (!node)
1941+
if (self->element_factory) {
1942+
node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
1943+
} else {
1944+
node = create_new_element(tag, attrib);
1945+
}
1946+
if (!node) {
19091947
return NULL;
1948+
}
19101949

19111950
this = (PyObject*) self->this;
19121951

@@ -2180,10 +2219,11 @@ static PyTypeObject TreeBuilder_Type = {
21802219
0, /* tp_getattro */
21812220
0, /* tp_setattro */
21822221
0, /* tp_as_buffer */
2183-
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
2222+
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2223+
/* tp_flags */
21842224
0, /* tp_doc */
2185-
0, /* tp_traverse */
2186-
0, /* tp_clear */
2225+
(traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2226+
(inquiry)treebuilder_gc_clear, /* tp_clear */
21872227
0, /* tp_richcompare */
21882228
0, /* tp_weaklistoffset */
21892229
0, /* tp_iter */
@@ -2443,17 +2483,20 @@ expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
24432483
attrib = Py_None;
24442484
}
24452485

2446-
if (TreeBuilder_CheckExact(self->target))
2486+
/* If we get None, pass an empty dictionary on */
2487+
if (attrib == Py_None) {
2488+
Py_DECREF(attrib);
2489+
attrib = PyDict_New();
2490+
if (!attrib)
2491+
return;
2492+
}
2493+
2494+
if (TreeBuilder_CheckExact(self->target)) {
24472495
/* shortcut */
24482496
res = treebuilder_handle_start((TreeBuilderObject*) self->target,
24492497
tag, attrib);
2498+
}
24502499
else if (self->handle_start) {
2451-
if (attrib == Py_None) {
2452-
Py_DECREF(attrib);
2453-
attrib = PyDict_New();
2454-
if (!attrib)
2455-
return;
2456-
}
24572500
res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
24582501
} else
24592502
res = NULL;

0 commit comments

Comments
 (0)