@@ -668,6 +668,8 @@ def __init__(self, fileobj, offset, size, blockinfo=None):
668668 self .offset = offset
669669 self .size = size
670670 self .position = 0
671+ self .name = getattr (fileobj , "name" , None )
672+ self .closed = False
671673
672674 if blockinfo is None :
673675 blockinfo = [(0 , size )]
@@ -686,21 +688,38 @@ def __init__(self, fileobj, offset, size, blockinfo=None):
686688 if lastpos < self .size :
687689 self .map .append ((False , lastpos , self .size , None ))
688690
691+ def flush (self ):
692+ pass
693+
694+ def readable (self ):
695+ return True
696+
697+ def writable (self ):
698+ return False
699+
689700 def seekable (self ):
690- if not hasattr (self .fileobj , "seekable" ):
691- # XXX gzip.GzipFile and bz2.BZ2File
692- return True
693701 return self .fileobj .seekable ()
694702
695703 def tell (self ):
696704 """Return the current file position.
697705 """
698706 return self .position
699707
700- def seek (self , position ):
708+ def seek (self , position , whence = io . SEEK_SET ):
701709 """Seek to a position in the file.
702710 """
703- self .position = position
711+ if whence == io .SEEK_SET :
712+ self .position = min (max (position , 0 ), self .size )
713+ elif whence == io .SEEK_CUR :
714+ if position < 0 :
715+ self .position = max (self .position + position , 0 )
716+ else :
717+ self .position = min (self .position + position , self .size )
718+ elif whence == io .SEEK_END :
719+ self .position = max (min (self .size + position , self .size ), 0 )
720+ else :
721+ raise ValueError ("Invalid argument" )
722+ return self .position
704723
705724 def read (self , size = None ):
706725 """Read data from the file.
@@ -729,146 +748,16 @@ def read(self, size=None):
729748 size -= length
730749 self .position += length
731750 return buf
732- #class _FileInFile
733-
734-
735- class ExFileObject (object ):
736- """File-like object for reading an archive member.
737- Is returned by TarFile.extractfile().
738- """
739- blocksize = 1024
740-
741- def __init__ (self , tarfile , tarinfo ):
742- self .fileobj = _FileInFile (tarfile .fileobj ,
743- tarinfo .offset_data ,
744- tarinfo .size ,
745- tarinfo .sparse )
746- self .name = tarinfo .name
747- self .mode = "r"
748- self .closed = False
749- self .size = tarinfo .size
750-
751- self .position = 0
752- self .buffer = b""
753-
754- def readable (self ):
755- return True
756-
757- def writable (self ):
758- return False
759-
760- def seekable (self ):
761- return self .fileobj .seekable ()
762-
763- def read (self , size = None ):
764- """Read at most size bytes from the file. If size is not
765- present or None, read all data until EOF is reached.
766- """
767- if self .closed :
768- raise ValueError ("I/O operation on closed file" )
769-
770- buf = b""
771- if self .buffer :
772- if size is None :
773- buf = self .buffer
774- self .buffer = b""
775- else :
776- buf = self .buffer [:size ]
777- self .buffer = self .buffer [size :]
778-
779- if size is None :
780- buf += self .fileobj .read ()
781- else :
782- buf += self .fileobj .read (size - len (buf ))
783-
784- self .position += len (buf )
785- return buf
786-
787- # XXX TextIOWrapper uses the read1() method.
788- read1 = read
789-
790- def readline (self , size = - 1 ):
791- """Read one entire line from the file. If size is present
792- and non-negative, return a string with at most that
793- size, which may be an incomplete line.
794- """
795- if self .closed :
796- raise ValueError ("I/O operation on closed file" )
797-
798- pos = self .buffer .find (b"\n " ) + 1
799- if pos == 0 :
800- # no newline found.
801- while True :
802- buf = self .fileobj .read (self .blocksize )
803- self .buffer += buf
804- if not buf or b"\n " in buf :
805- pos = self .buffer .find (b"\n " ) + 1
806- if pos == 0 :
807- # no newline found.
808- pos = len (self .buffer )
809- break
810-
811- if size != - 1 :
812- pos = min (size , pos )
813-
814- buf = self .buffer [:pos ]
815- self .buffer = self .buffer [pos :]
816- self .position += len (buf )
817- return buf
818-
819- def readlines (self ):
820- """Return a list with all remaining lines.
821- """
822- result = []
823- while True :
824- line = self .readline ()
825- if not line : break
826- result .append (line )
827- return result
828-
829- def tell (self ):
830- """Return the current file position.
831- """
832- if self .closed :
833- raise ValueError ("I/O operation on closed file" )
834-
835- return self .position
836751
837- def seek (self , pos , whence = io .SEEK_SET ):
838- """Seek to a position in the file.
839- """
840- if self .closed :
841- raise ValueError ("I/O operation on closed file" )
842-
843- if whence == io .SEEK_SET :
844- self .position = min (max (pos , 0 ), self .size )
845- elif whence == io .SEEK_CUR :
846- if pos < 0 :
847- self .position = max (self .position + pos , 0 )
848- else :
849- self .position = min (self .position + pos , self .size )
850- elif whence == io .SEEK_END :
851- self .position = max (min (self .size + pos , self .size ), 0 )
852- else :
853- raise ValueError ("Invalid argument" )
854-
855- self .buffer = b""
856- self .fileobj .seek (self .position )
752+ def readinto (self , b ):
753+ buf = self .read (len (b ))
754+ b [:len (buf )] = buf
755+ return len (buf )
857756
858757 def close (self ):
859- """Close the file object.
860- """
861758 self .closed = True
759+ #class _FileInFile
862760
863- def __iter__ (self ):
864- """Get an iterator over the file's lines.
865- """
866- while True :
867- line = self .readline ()
868- if not line :
869- break
870- yield line
871- #class ExFileObject
872761
873762#------------------
874763# Exported Classes
@@ -1554,7 +1443,8 @@ class TarFile(object):
15541443
15551444 tarinfo = TarInfo # The default TarInfo class to use.
15561445
1557- fileobject = ExFileObject # The default ExFileObject class to use.
1446+ fileobject = None # The file-object for extractfile() or
1447+ # io.BufferedReader if None.
15581448
15591449 def __init__ (self , name = None , mode = "r" , fileobj = None , format = None ,
15601450 tarinfo = None , dereference = None , ignore_zeros = None , encoding = None ,
@@ -2178,12 +2068,9 @@ def extract(self, member, path="", set_attrs=True):
21782068
21792069 def extractfile (self , member ):
21802070 """Extract a member from the archive as a file object. `member' may be
2181- a filename or a TarInfo object. If `member' is a regular file, a
2182- file-like object is returned. If `member' is a link, a file-like
2183- object is constructed from the link's target. If `member' is none of
2184- the above, None is returned.
2185- The file-like object is read-only and provides the following
2186- methods: read(), readline(), readlines(), seek() and tell()
2071+ a filename or a TarInfo object. If `member' is a regular file or a
2072+ link, an io.BufferedReader object is returned. Otherwise, None is
2073+ returned.
21872074 """
21882075 self ._check ("r" )
21892076
@@ -2192,13 +2079,14 @@ def extractfile(self, member):
21922079 else :
21932080 tarinfo = member
21942081
2195- if tarinfo .isreg ():
2196- return self .fileobject (self , tarinfo )
2197-
2198- elif tarinfo .type not in SUPPORTED_TYPES :
2199- # If a member's type is unknown, it is treated as a
2200- # regular file.
2201- return self .fileobject (self , tarinfo )
2082+ if tarinfo .isreg () or tarinfo .type not in SUPPORTED_TYPES :
2083+ # Members with unknown types are treated as regular files.
2084+ if self .fileobject is None :
2085+ fileobj = _FileInFile (self .fileobj , tarinfo .offset_data , tarinfo .size , tarinfo .sparse )
2086+ return io .BufferedReader (fileobj )
2087+ else :
2088+ # Keep the traditional pre-3.3 API intact.
2089+ return self .fileobject (self , tarinfo )
22022090
22032091 elif tarinfo .islnk () or tarinfo .issym ():
22042092 if isinstance (self .fileobj , _Stream ):
0 commit comments