Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 7a919e9

Browse files
committed
Issue #13815: TarFile.extractfile() now returns io.BufferedReader objects.
The ExFileObject class was removed, some of its code went into _FileInFile.
1 parent ef5a463 commit 7a919e9

4 files changed

Lines changed: 80 additions & 200 deletions

File tree

Doc/library/tarfile.rst

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -376,15 +376,12 @@ be finalized; only the internally used file object will be closed. See the
376376
.. method:: TarFile.extractfile(member)
377377

378378
Extract a member from the archive as a file object. *member* may be a filename
379-
or a :class:`TarInfo` object. If *member* is a regular file, a :term:`file-like
380-
object` is returned. If *member* is a link, a file-like object is constructed from
381-
the link's target. If *member* is none of the above, :const:`None` is returned.
379+
or a :class:`TarInfo` object. If *member* is a regular file or a link, an
380+
:class:`io.BufferedReader` object is returned. Otherwise, :const:`None` is
381+
returned.
382382

383-
.. note::
384-
385-
The file-like object is read-only. It provides the methods
386-
:meth:`read`, :meth:`readline`, :meth:`readlines`, :meth:`seek`, :meth:`tell`,
387-
and :meth:`close`, and also supports iteration over its lines.
383+
.. versionchanged:: 3.3
384+
Return an :class:`io.BufferedReader` object.
388385

389386

390387
.. method:: TarFile.add(name, arcname=None, recursive=True, exclude=None, *, filter=None)

Lib/tarfile.py

Lines changed: 42 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,8 @@ def __init__(self, fileobj, offset, size, blockinfo=None):
668668
self.offset = offset
669669
self.size = size
670670
self.position = 0
671+
self.name = getattr(fileobj, "name", None)
672+
self.closed = False
671673

672674
if blockinfo is None:
673675
blockinfo = [(0, size)]
@@ -686,21 +688,38 @@ def __init__(self, fileobj, offset, size, blockinfo=None):
686688
if lastpos < self.size:
687689
self.map.append((False, lastpos, self.size, None))
688690

691+
def flush(self):
692+
pass
693+
694+
def readable(self):
695+
return True
696+
697+
def writable(self):
698+
return False
699+
689700
def seekable(self):
690-
if not hasattr(self.fileobj, "seekable"):
691-
# XXX gzip.GzipFile and bz2.BZ2File
692-
return True
693701
return self.fileobj.seekable()
694702

695703
def tell(self):
696704
"""Return the current file position.
697705
"""
698706
return self.position
699707

700-
def seek(self, position):
708+
def seek(self, position, whence=io.SEEK_SET):
701709
"""Seek to a position in the file.
702710
"""
703-
self.position = position
711+
if whence == io.SEEK_SET:
712+
self.position = min(max(position, 0), self.size)
713+
elif whence == io.SEEK_CUR:
714+
if position < 0:
715+
self.position = max(self.position + position, 0)
716+
else:
717+
self.position = min(self.position + position, self.size)
718+
elif whence == io.SEEK_END:
719+
self.position = max(min(self.size + position, self.size), 0)
720+
else:
721+
raise ValueError("Invalid argument")
722+
return self.position
704723

705724
def read(self, size=None):
706725
"""Read data from the file.
@@ -729,146 +748,16 @@ def read(self, size=None):
729748
size -= length
730749
self.position += length
731750
return buf
732-
#class _FileInFile
733-
734-
735-
class ExFileObject(object):
736-
"""File-like object for reading an archive member.
737-
Is returned by TarFile.extractfile().
738-
"""
739-
blocksize = 1024
740-
741-
def __init__(self, tarfile, tarinfo):
742-
self.fileobj = _FileInFile(tarfile.fileobj,
743-
tarinfo.offset_data,
744-
tarinfo.size,
745-
tarinfo.sparse)
746-
self.name = tarinfo.name
747-
self.mode = "r"
748-
self.closed = False
749-
self.size = tarinfo.size
750-
751-
self.position = 0
752-
self.buffer = b""
753-
754-
def readable(self):
755-
return True
756-
757-
def writable(self):
758-
return False
759-
760-
def seekable(self):
761-
return self.fileobj.seekable()
762-
763-
def read(self, size=None):
764-
"""Read at most size bytes from the file. If size is not
765-
present or None, read all data until EOF is reached.
766-
"""
767-
if self.closed:
768-
raise ValueError("I/O operation on closed file")
769-
770-
buf = b""
771-
if self.buffer:
772-
if size is None:
773-
buf = self.buffer
774-
self.buffer = b""
775-
else:
776-
buf = self.buffer[:size]
777-
self.buffer = self.buffer[size:]
778-
779-
if size is None:
780-
buf += self.fileobj.read()
781-
else:
782-
buf += self.fileobj.read(size - len(buf))
783-
784-
self.position += len(buf)
785-
return buf
786-
787-
# XXX TextIOWrapper uses the read1() method.
788-
read1 = read
789-
790-
def readline(self, size=-1):
791-
"""Read one entire line from the file. If size is present
792-
and non-negative, return a string with at most that
793-
size, which may be an incomplete line.
794-
"""
795-
if self.closed:
796-
raise ValueError("I/O operation on closed file")
797-
798-
pos = self.buffer.find(b"\n") + 1
799-
if pos == 0:
800-
# no newline found.
801-
while True:
802-
buf = self.fileobj.read(self.blocksize)
803-
self.buffer += buf
804-
if not buf or b"\n" in buf:
805-
pos = self.buffer.find(b"\n") + 1
806-
if pos == 0:
807-
# no newline found.
808-
pos = len(self.buffer)
809-
break
810-
811-
if size != -1:
812-
pos = min(size, pos)
813-
814-
buf = self.buffer[:pos]
815-
self.buffer = self.buffer[pos:]
816-
self.position += len(buf)
817-
return buf
818-
819-
def readlines(self):
820-
"""Return a list with all remaining lines.
821-
"""
822-
result = []
823-
while True:
824-
line = self.readline()
825-
if not line: break
826-
result.append(line)
827-
return result
828-
829-
def tell(self):
830-
"""Return the current file position.
831-
"""
832-
if self.closed:
833-
raise ValueError("I/O operation on closed file")
834-
835-
return self.position
836751

837-
def seek(self, pos, whence=io.SEEK_SET):
838-
"""Seek to a position in the file.
839-
"""
840-
if self.closed:
841-
raise ValueError("I/O operation on closed file")
842-
843-
if whence == io.SEEK_SET:
844-
self.position = min(max(pos, 0), self.size)
845-
elif whence == io.SEEK_CUR:
846-
if pos < 0:
847-
self.position = max(self.position + pos, 0)
848-
else:
849-
self.position = min(self.position + pos, self.size)
850-
elif whence == io.SEEK_END:
851-
self.position = max(min(self.size + pos, self.size), 0)
852-
else:
853-
raise ValueError("Invalid argument")
854-
855-
self.buffer = b""
856-
self.fileobj.seek(self.position)
752+
def readinto(self, b):
753+
buf = self.read(len(b))
754+
b[:len(buf)] = buf
755+
return len(buf)
857756

858757
def close(self):
859-
"""Close the file object.
860-
"""
861758
self.closed = True
759+
#class _FileInFile
862760

863-
def __iter__(self):
864-
"""Get an iterator over the file's lines.
865-
"""
866-
while True:
867-
line = self.readline()
868-
if not line:
869-
break
870-
yield line
871-
#class ExFileObject
872761

873762
#------------------
874763
# Exported Classes
@@ -1554,7 +1443,8 @@ class TarFile(object):
15541443

15551444
tarinfo = TarInfo # The default TarInfo class to use.
15561445

1557-
fileobject = ExFileObject # The default ExFileObject class to use.
1446+
fileobject = None # The file-object for extractfile() or
1447+
# io.BufferedReader if None.
15581448

15591449
def __init__(self, name=None, mode="r", fileobj=None, format=None,
15601450
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
@@ -2178,12 +2068,9 @@ def extract(self, member, path="", set_attrs=True):
21782068

21792069
def extractfile(self, member):
21802070
"""Extract a member from the archive as a file object. `member' may be
2181-
a filename or a TarInfo object. If `member' is a regular file, a
2182-
file-like object is returned. If `member' is a link, a file-like
2183-
object is constructed from the link's target. If `member' is none of
2184-
the above, None is returned.
2185-
The file-like object is read-only and provides the following
2186-
methods: read(), readline(), readlines(), seek() and tell()
2071+
a filename or a TarInfo object. If `member' is a regular file or a
2072+
link, an io.BufferedReader object is returned. Otherwise, None is
2073+
returned.
21872074
"""
21882075
self._check("r")
21892076

@@ -2192,13 +2079,14 @@ def extractfile(self, member):
21922079
else:
21932080
tarinfo = member
21942081

2195-
if tarinfo.isreg():
2196-
return self.fileobject(self, tarinfo)
2197-
2198-
elif tarinfo.type not in SUPPORTED_TYPES:
2199-
# If a member's type is unknown, it is treated as a
2200-
# regular file.
2201-
return self.fileobject(self, tarinfo)
2082+
if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2083+
# Members with unknown types are treated as regular files.
2084+
if self.fileobject is None:
2085+
fileobj = _FileInFile(self.fileobj, tarinfo.offset_data, tarinfo.size, tarinfo.sparse)
2086+
return io.BufferedReader(fileobj)
2087+
else:
2088+
# Keep the traditional pre-3.3 API intact.
2089+
return self.fileobject(self, tarinfo)
22022090

22032091
elif tarinfo.islnk() or tarinfo.issym():
22042092
if isinstance(self.fileobj, _Stream):

0 commit comments

Comments
 (0)