From 1495ccde7b984ac9184061024f0b05112e42353d Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Sat, 5 Mar 2022 09:48:00 -0500 Subject: [PATCH 1/3] bpo-43292: Fix file leak in iterparse when not exhausted --- Lib/test/test_xml_etree.py | 5 +++++ Lib/xml/etree/ElementTree.py | 11 ++++++----- Misc/ACKS | 1 + .../Library/2022-03-05-09-43-53.bpo-25707.gTlclP.rst | 2 ++ 4 files changed, 14 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-03-05-09-43-53.bpo-25707.gTlclP.rst diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 35d901f9d08244..c82c65da85a90c 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -658,6 +658,11 @@ def test_iterparse(self): 'junk after document element: line 1, column 12') del cm, it + # Not exhausting the iterator still closes the resource (bpo-43292) + with warnings_helper.check_no_resource_warning(self): + it = iterparse(TESTFN) + del it + def test_writefile(self): elem = ET.Element("tag") elem.text = "text" diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 6059e2f592d2d0..d80eaffc437e5c 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1244,7 +1244,13 @@ def iterparse(source, events=None, parser=None): # Use the internal, undocumented _parser argument for now; When the # parser argument of iterparse is removed, this can be killed. pullparser = XMLPullParser(events=events, _parser=parser) + def iterator(): + nonlocal source + close_source = False + if not hasattr(source, "read"): + source = open(source, "rb") + close_source = True try: while True: yield from pullparser.read_events() @@ -1266,11 +1272,6 @@ class IterParseIterator(collections.abc.Iterator): it.root = None del iterator, IterParseIterator - close_source = False - if not hasattr(source, "read"): - source = open(source, "rb") - close_source = True - return it diff --git a/Misc/ACKS b/Misc/ACKS index da2c82610d5adf..df851bb834cd4e 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1879,6 +1879,7 @@ Wojtek Walczak Charles Waldman Richard Walker Larry Wall +Jacob Walls Kevin Walzer Rodrigo Steinmuller Wanderley Dingyuan Wang diff --git a/Misc/NEWS.d/next/Library/2022-03-05-09-43-53.bpo-25707.gTlclP.rst b/Misc/NEWS.d/next/Library/2022-03-05-09-43-53.bpo-25707.gTlclP.rst new file mode 100644 index 00000000000000..a59f0a7657ff23 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-03-05-09-43-53.bpo-25707.gTlclP.rst @@ -0,0 +1,2 @@ +Fixed a file leak in :func:`xml.etree.ElementTree.iterparse` when the +iterator is not exhausted. Patch by Jacob Walls. From b29dba75e02c47a91dc94b4ce199070491c49db5 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 6 Mar 2022 16:23:19 +0200 Subject: [PATCH 2/3] Update ElementTree.py --- Lib/xml/etree/ElementTree.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index d80eaffc437e5c..5249c7ab82b84b 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1245,13 +1245,13 @@ def iterparse(source, events=None, parser=None): # parser argument of iterparse is removed, this can be killed. pullparser = XMLPullParser(events=events, _parser=parser) - def iterator(): - nonlocal source + def iterator(source): close_source = False - if not hasattr(source, "read"): - source = open(source, "rb") - close_source = True try: + if not hasattr(source, "read"): + source = open(source, "rb") + close_source = True + yield None while True: yield from pullparser.read_events() # load event buffer @@ -1267,11 +1267,12 @@ def iterator(): source.close() class IterParseIterator(collections.abc.Iterator): - __next__ = iterator().__next__ + __next__ = iterator(source).__next__ it = IterParseIterator() it.root = None del iterator, IterParseIterator + next(it) return it From f532022dc684dcb768afc1cb3d6717c96aac1ed9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 6 Mar 2022 16:23:41 +0200 Subject: [PATCH 3/3] Update Lib/test/test_xml_etree.py --- Lib/test/test_xml_etree.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index c82c65da85a90c..d2bdc4f7f04445 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -663,6 +663,9 @@ def test_iterparse(self): it = iterparse(TESTFN) del it + with self.assertRaises(FileNotFoundError): + iterparse("nonexistent") + def test_writefile(self): elem = ET.Element("tag") elem.text = "text"