Thanks to visit codestin.com
Credit goes to github.com

Skip to content

bpo-36676: Namespace prefix aware parsing support for the ET.XMLParser target #12885

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions Doc/library/xml.etree.elementtree.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1086,7 +1086,7 @@ TreeBuilder Objects


In addition, a custom :class:`TreeBuilder` object can provide the
following method:
following methods:

.. method:: doctype(name, pubid, system)

Expand All @@ -1096,6 +1096,23 @@ TreeBuilder Objects

.. versionadded:: 3.2

.. method:: start_ns(prefix, uri)

Is called whenever the parser encounters a new namespace declaration,
before the ``start()`` callback for the opening element that defines it.
*prefix* is ``''`` for the default namespace and the declared
namespace prefix name otherwise. *uri* is the namespace URI.

.. versionadded:: 3.8

.. method:: end_ns(prefix)

Is called after the ``end()`` callback of an element that declared
a namespace prefix mapping, with the name of the *prefix* that went
out of scope.

.. versionadded:: 3.8


.. _elementtree-xmlparser-objects:

Expand Down Expand Up @@ -1131,7 +1148,8 @@ XMLParser Objects

:meth:`XMLParser.feed` calls *target*\'s ``start(tag, attrs_dict)`` method
for each opening tag, its ``end(tag)`` method for each closing tag, and data
is processed by method ``data(data)``. :meth:`XMLParser.close` calls
is processed by method ``data(data)``. For further supported callback
methods, see the :class:`TreeBuilder` class. :meth:`XMLParser.close` calls
*target*\'s method ``close()``. :class:`XMLParser` can be used not only for
building a tree structure. This is an example of counting the maximum depth
of an XML file::
Expand Down
93 changes: 88 additions & 5 deletions Lib/test/test_xml_etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@
import operator
import pickle
import sys
import textwrap
import types
import unittest
import warnings
import weakref

from itertools import product
from itertools import product, islice
from test import support
from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr

Expand Down Expand Up @@ -694,12 +695,17 @@ def pi(self, target, data):
self.append(("pi", target, data))
def comment(self, data):
self.append(("comment", data))
def start_ns(self, prefix, uri):
self.append(("start-ns", prefix, uri))
def end_ns(self, prefix):
self.append(("end-ns", prefix))
builder = Builder()
parser = ET.XMLParser(target=builder)
parser.feed(data)
self.assertEqual(builder, [
('pi', 'pi', 'data'),
('comment', ' comment '),
('start-ns', '', 'namespace'),
('start', '{namespace}root'),
('start', '{namespace}element'),
('end', '{namespace}element'),
Expand All @@ -708,8 +714,30 @@ def comment(self, data):
('start', '{namespace}empty-element'),
('end', '{namespace}empty-element'),
('end', '{namespace}root'),
('end-ns', ''),
])

def test_custom_builder_only_end_ns(self):
class Builder(list):
def end_ns(self, prefix):
self.append(("end-ns", prefix))

builder = Builder()
parser = ET.XMLParser(target=builder)
parser.feed(textwrap.dedent("""\
<?pi data?>
<!-- comment -->
<root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'>
<a:element key='value'>text</a:element>
<p:element>text</p:element>tail
<empty-element/>
</root>
"""))
self.assertEqual(builder, [
('end-ns', 'a'),
('end-ns', 'p'),
('end-ns', ''),
])

# Element.getchildren() and ElementTree.getiterator() are deprecated.
@checkwarnings(("This method will be removed in future versions. "
Expand Down Expand Up @@ -1194,14 +1222,19 @@ def _feed(self, parser, data, chunk_size=None):
for i in range(0, len(data), chunk_size):
parser.feed(data[i:i+chunk_size])

def assert_events(self, parser, expected):
def assert_events(self, parser, expected, max_events=None):
self.assertEqual(
[(event, (elem.tag, elem.text))
for event, elem in parser.read_events()],
for event, elem in islice(parser.read_events(), max_events)],
expected)

def assert_event_tags(self, parser, expected):
events = parser.read_events()
def assert_event_tuples(self, parser, expected, max_events=None):
self.assertEqual(
list(islice(parser.read_events(), max_events)),
expected)

def assert_event_tags(self, parser, expected, max_events=None):
events = islice(parser.read_events(), max_events)
self.assertEqual([(action, elem.tag) for action, elem in events],
expected)

Expand Down Expand Up @@ -1276,6 +1309,56 @@ def test_ns_events(self):
self.assertEqual(list(parser.read_events()), [('end-ns', None)])
self.assertIsNone(parser.close())

def test_ns_events_start(self):
parser = ET.XMLPullParser(events=('start-ns', 'start', 'end'))
self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
self.assert_event_tuples(parser, [
('start-ns', ('', 'abc')),
('start-ns', ('p', 'xyz')),
], max_events=2)
self.assert_event_tags(parser, [
('start', '{abc}tag'),
], max_events=1)

self._feed(parser, "<child />\n")
self.assert_event_tags(parser, [
('start', '{abc}child'),
('end', '{abc}child'),
])

self._feed(parser, "</tag>\n")
parser.close()
self.assert_event_tags(parser, [
('end', '{abc}tag'),
])

def test_ns_events_start_end(self):
parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns'))
self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
self.assert_event_tuples(parser, [
('start-ns', ('', 'abc')),
('start-ns', ('p', 'xyz')),
], max_events=2)
self.assert_event_tags(parser, [
('start', '{abc}tag'),
], max_events=1)

self._feed(parser, "<child />\n")
self.assert_event_tags(parser, [
('start', '{abc}child'),
('end', '{abc}child'),
])

self._feed(parser, "</tag>\n")
parser.close()
self.assert_event_tags(parser, [
('end', '{abc}tag'),
], max_events=1)
self.assert_event_tuples(parser, [
('end-ns', None),
('end-ns', None),
])

def test_events(self):
parser = ET.XMLPullParser(events=())
self._feed(parser, "<root/>\n")
Expand Down
30 changes: 26 additions & 4 deletions Lib/xml/etree/ElementTree.py
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,10 @@ def __init__(self, *, target=None, encoding=None):
parser.StartElementHandler = self._start
if hasattr(target, 'end'):
parser.EndElementHandler = self._end
if hasattr(target, 'start_ns'):
parser.StartNamespaceDeclHandler = self._start_ns
if hasattr(target, 'end_ns'):
parser.EndNamespaceDeclHandler = self._end_ns
if hasattr(target, 'data'):
parser.CharacterDataHandler = target.data
# miscellaneous callbacks
Expand Down Expand Up @@ -1559,12 +1563,24 @@ def handler(tag, event=event_name, append=append,
append((event, end(tag)))
parser.EndElementHandler = handler
elif event_name == "start-ns":
def handler(prefix, uri, event=event_name, append=append):
append((event, (prefix or "", uri or "")))
# TreeBuilder does not implement .start_ns()
if hasattr(self.target, "start_ns"):
def handler(prefix, uri, event=event_name, append=append,
start_ns=self._start_ns):
append((event, start_ns(prefix, uri)))
else:
def handler(prefix, uri, event=event_name, append=append):
append((event, (prefix or '', uri or '')))
parser.StartNamespaceDeclHandler = handler
elif event_name == "end-ns":
def handler(prefix, event=event_name, append=append):
append((event, None))
# TreeBuilder does not implement .end_ns()
if hasattr(self.target, "end_ns"):
def handler(prefix, event=event_name, append=append,
end_ns=self._end_ns):
append((event, end_ns(prefix)))
else:
def handler(prefix, event=event_name, append=append):
append((event, None))
parser.EndNamespaceDeclHandler = handler
elif event_name == 'comment':
def handler(text, event=event_name, append=append, self=self):
Expand Down Expand Up @@ -1595,6 +1611,12 @@ def _fixname(self, key):
self._names[key] = name
return name

def _start_ns(self, prefix, uri):
return self.target.start_ns(prefix or '', uri or '')

def _end_ns(self, prefix):
return self.target.end_ns(prefix or '')

def _start(self, tag, attr_list):
# Handler for expat's StartElementHandler. Since ordered_attributes
# is set, the attributes are reported as a list of alternating
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
The XMLParser() in xml.etree.ElementTree provides namespace prefix context to the
parser target if it defines the callback methods "start_ns()" and/or "end_ns()".
Patch by Stefan Behnel.
Loading