Thanks to visit codestin.com
Credit goes to github.com

Skip to content

gh-102120: Added an iter function that doesn't cache #102128

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 23, 2023
Merged
7 changes: 6 additions & 1 deletion Doc/library/tarfile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ be finalized; only the internally used file object will be closed. See the
.. versionadded:: 3.2
Added support for the context management protocol.

.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=1)
.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=1, stream=False)

All following arguments are optional and can be accessed as instance attributes
as well.
Expand Down Expand Up @@ -369,6 +369,9 @@ be finalized; only the internally used file object will be closed. See the
The *pax_headers* argument is an optional dictionary of strings which
will be added as a pax global header if *format* is :const:`PAX_FORMAT`.

If *stream* is set to :const:`True` then while reading the archive info about files
in the archive are not cached, saving memory.

.. versionchanged:: 3.2
Use ``'surrogateescape'`` as the default for the *errors* argument.

Expand All @@ -378,6 +381,8 @@ be finalized; only the internally used file object will be closed. See the
.. versionchanged:: 3.6
The *name* parameter accepts a :term:`path-like object`.

.. versionchanged:: 3.13
Add the *stream* parameter.

.. classmethod:: TarFile.open(...)

Expand Down
17 changes: 11 additions & 6 deletions Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1633,7 +1633,7 @@ class TarFile(object):
def __init__(self, name=None, mode="r", fileobj=None, format=None,
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
errors="surrogateescape", pax_headers=None, debug=None,
errorlevel=None, copybufsize=None):
errorlevel=None, copybufsize=None, stream=False):
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
read from an existing archive, 'a' to append data to an existing
file or 'w' to create a new file overwriting an existing one. `mode'
Expand Down Expand Up @@ -1665,6 +1665,8 @@ def __init__(self, name=None, mode="r", fileobj=None, format=None,
self.name = os.path.abspath(name) if name else None
self.fileobj = fileobj

self.stream = stream

# Init attributes.
if format is not None:
self.format = format
Expand Down Expand Up @@ -2631,7 +2633,9 @@ def next(self):
break

if tarinfo is not None:
self.members.append(tarinfo)
# if streaming the file we do not want to cache the tarinfo
if not self.stream:
self.members.append(tarinfo)
else:
self._loaded = True

Expand Down Expand Up @@ -2682,11 +2686,12 @@ def _getmember(self, name, tarinfo=None, normalize=False):

def _load(self):
"""Read through the entire archive file and look for readable
members.
members. This should not run if the file is set to stream.
"""
while self.next() is not None:
pass
self._loaded = True
if not self.stream:
while self.next() is not None:
pass
self._loaded = True

def _check(self, mode=None):
"""Check if TarFile is still open, and if the operation's mode
Expand Down
23 changes: 23 additions & 0 deletions Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ def setUp(self):
def tearDown(self):
self.tar.close()

class StreamModeTest(ReadTest):

# Only needs to change how the tarfile is opened to set
# stream mode
def setUp(self):
self.tar = tarfile.open(self.tarname, mode=self.mode,
encoding="iso8859-1",
stream=True)

class UstarReadTest(ReadTest, unittest.TestCase):

Expand Down Expand Up @@ -852,6 +860,21 @@ class Bz2StreamReadTest(Bz2Test, StreamReadTest):
class LzmaStreamReadTest(LzmaTest, StreamReadTest):
pass

class TarStreamModeReadTest(StreamModeTest, unittest.TestCase):

def test_stream_mode_no_cache(self):
for _ in self.tar:
pass
self.assertEqual(self.tar.members, [])

class GzipStreamModeReadTest(GzipTest, TarStreamModeReadTest):
pass

class Bz2StreamModeReadTest(Bz2Test, TarStreamModeReadTest):
pass

class LzmaStreamModeReadTest(LzmaTest, TarStreamModeReadTest):
pass

class DetectReadTest(TarTest, unittest.TestCase):
def _testfunc_file(self, name, mode):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Added a stream mode to ``tarfile`` that allows for reading
archives without caching info about the inner files.