Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 91d5193

Browse files
committed
Closes #2892: preserve iterparse events in case of SyntaxError.
1 parent c1e73c3 commit 91d5193

4 files changed

Lines changed: 49 additions & 30 deletions

File tree

Lib/test/test_xml_etree.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,7 @@ def iterparse():
754754
... print(action, elem.tag)
755755
... except ET.ParseError as v:
756756
... print(v)
757+
end document
757758
junk after document element: line 1, column 12
758759
"""
759760

Lib/xml/etree/ElementTree.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,6 +1250,7 @@ def __init__(self, source, events, parser, close_source=False):
12501250
self._close_file = close_source
12511251
self._events = []
12521252
self._index = 0
1253+
self._error = None
12531254
self.root = self._root = None
12541255
self._parser = parser
12551256
# wire up the parser for event reporting
@@ -1291,24 +1292,31 @@ def __next__(self):
12911292
while 1:
12921293
try:
12931294
item = self._events[self._index]
1295+
self._index += 1
1296+
return item
12941297
except IndexError:
1295-
if self._parser is None:
1296-
self.root = self._root
1297-
if self._close_file:
1298-
self._file.close()
1299-
raise StopIteration
1300-
# load event buffer
1301-
del self._events[:]
1302-
self._index = 0
1303-
data = self._file.read(16384)
1304-
if data:
1298+
pass
1299+
if self._error:
1300+
e = self._error
1301+
self._error = None
1302+
raise e
1303+
if self._parser is None:
1304+
self.root = self._root
1305+
if self._close_file:
1306+
self._file.close()
1307+
raise StopIteration
1308+
# load event buffer
1309+
del self._events[:]
1310+
self._index = 0
1311+
data = self._file.read(16384)
1312+
if data:
1313+
try:
13051314
self._parser.feed(data)
1306-
else:
1307-
self._root = self._parser.close()
1308-
self._parser = None
1315+
except SyntaxError as exc:
1316+
self._error = exc
13091317
else:
1310-
self._index = self._index + 1
1311-
return item
1318+
self._root = self._parser.close()
1319+
self._parser = None
13121320

13131321
def __iter__(self):
13141322
return self

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ Core and Builtins
6666
Library
6767
-------
6868

69+
- Issue #2892: preserve iterparse events in case of SyntaxError.
70+
6971
- Issue #670664: Fix HTMLParser to correctly handle the content of
7072
``<script>...</script>`` and ``<style>...</style>``.
7173

Modules/_elementtree.c

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3000,6 +3000,7 @@ PyInit__elementtree(void)
30003000
" self._file = file\n"
30013001
" self._events = []\n"
30023002
" self._index = 0\n"
3003+
" self._error = None\n"
30033004
" self.root = self._root = None\n"
30043005
" b = cElementTree.TreeBuilder()\n"
30053006
" self._parser = cElementTree.XMLParser(b)\n"
@@ -3008,24 +3009,31 @@ PyInit__elementtree(void)
30083009
" while 1:\n"
30093010
" try:\n"
30103011
" item = self._events[self._index]\n"
3012+
" self._index += 1\n"
3013+
" return item\n"
30113014
" except IndexError:\n"
3012-
" if self._parser is None:\n"
3013-
" self.root = self._root\n"
3014-
" if self._close_file:\n"
3015-
" self._file.close()\n"
3016-
" raise StopIteration\n"
3017-
" # load event buffer\n"
3018-
" del self._events[:]\n"
3019-
" self._index = 0\n"
3020-
" data = self._file.read(16384)\n"
3021-
" if data:\n"
3015+
" pass\n"
3016+
" if self._error:\n"
3017+
" e = self._error\n"
3018+
" self._error = None\n"
3019+
" raise e\n"
3020+
" if self._parser is None:\n"
3021+
" self.root = self._root\n"
3022+
" if self._close_file:\n"
3023+
" self._file.close()\n"
3024+
" raise StopIteration\n"
3025+
" # load event buffer\n"
3026+
" del self._events[:]\n"
3027+
" self._index = 0\n"
3028+
" data = self._file.read(16384)\n"
3029+
" if data:\n"
3030+
" try:\n"
30223031
" self._parser.feed(data)\n"
3023-
" else:\n"
3024-
" self._root = self._parser.close()\n"
3025-
" self._parser = None\n"
3032+
" except SyntaxError as exc:\n"
3033+
" self._error = exc\n"
30263034
" else:\n"
3027-
" self._index = self._index + 1\n"
3028-
" return item\n"
3035+
" self._root = self._parser.close()\n"
3036+
" self._parser = None\n"
30293037
" def __iter__(self):\n"
30303038
" return self\n"
30313039
"cElementTree.iterparse = iterparse\n"

0 commit comments

Comments
 (0)