@@ -45,6 +45,62 @@ def open(filename, mode="rb", compresslevel=9):
4545 """
4646 return GzipFile (filename , mode , compresslevel )
4747
48+ class _PaddedFile :
49+ """Minimal read-only file object that prepends a string to the contents
50+ of an actual file. Shouldn't be used outside of gzip.py, as it lacks
51+ essential functionality."""
52+
53+ def __init__ (self , f , prepend = b'' ):
54+ self ._buffer = prepend
55+ self ._length = len (prepend )
56+ self .file = f
57+ self ._read = 0
58+
59+ def read (self , size ):
60+ if self ._read is None :
61+ return self .file .read (size )
62+ if self ._read + size <= self ._length :
63+ read = self ._read
64+ self ._read += size
65+ return self ._buffer [read :self ._read ]
66+ else :
67+ read = self ._read
68+ self ._read = None
69+ return self ._buffer [read :] + \
70+ self .file .read (size - self ._length + read )
71+
72+ def prepend (self , prepend = b'' , readprevious = False ):
73+ if self ._read is None :
74+ self ._buffer = prepend
75+ elif readprevious and len (prepend ) <= self ._read :
76+ self ._read -= len (prepend )
77+ return
78+ else :
79+ self ._buffer = self ._buffer [read :] + prepend
80+ self ._length = len (self ._buffer )
81+ self ._read = 0
82+
83+ def unused (self ):
84+ if self ._read is None :
85+ return b''
86+ return self ._buffer [self ._read :]
87+
88+ def seek (self , offset , whence = 0 ):
89+ # This is only ever called with offset=whence=0
90+ if whence == 1 and self ._read is not None :
91+ if 0 <= offset + self ._read <= self ._length :
92+ self ._read += offset
93+ return
94+ else :
95+ offset += self ._length - self ._read
96+ self ._read = None
97+ self ._buffer = None
98+ return self .file .seek (offset , whence )
99+
100+ def __getattr__ (self , name ):
101+ return getattr (name , self .file )
102+
103+
48104class GzipFile (io .BufferedIOBase ):
49105 """The GzipFile class simulates most of the methods of a file object with
50106 the exception of the readinto() and truncate() methods.
@@ -119,6 +175,7 @@ def __init__(self, filename=None, mode=None,
119175 self .name = filename
120176 # Starts small, scales exponentially
121177 self .min_readsize = 100
178+ fileobj = _PaddedFile (fileobj )
122179
123180 elif mode [0 :1 ] == 'w' or mode [0 :1 ] == 'a' :
124181 self .mode = WRITE
@@ -188,6 +245,9 @@ def _init_read(self):
188245
189246 def _read_gzip_header (self ):
190247 magic = self .fileobj .read (2 )
248+ if magic == b'' :
249+ raise EOFError ("Reached EOF" )
250+
191251 if magic != b'\037 \213 ' :
192252 raise IOError ('Not a gzipped file' )
193253 method = ord ( self .fileobj .read (1 ) )
@@ -219,6 +279,11 @@ def _read_gzip_header(self):
219279 if flag & FHCRC :
220280 self .fileobj .read (2 ) # Read & discard the 16-bit header CRC
221281
282+ unused = self .fileobj .unused ()
283+ if unused :
284+ uncompress = self .decompress .decompress (unused )
285+ self ._add_read_data (uncompress )
286+
222287 def write (self ,data ):
223288 if self .mode != WRITE :
224289 import errno
@@ -282,16 +347,6 @@ def _read(self, size=1024):
282347 if self ._new_member :
283348 # If the _new_member flag is set, we have to
284349 # jump to the next member, if there is one.
285- #
286- # First, check if we're at the end of the file;
287- # if so, it's time to stop; no more members to read.
288- pos = self .fileobj .tell () # Save current position
289- self .fileobj .seek (0 , 2 ) # Seek to end of file
290- if pos == self .fileobj .tell ():
291- raise EOFError ("Reached EOF" )
292- else :
293- self .fileobj .seek ( pos ) # Return to original position
294-
295350 self ._init_read ()
296351 self ._read_gzip_header ()
297352 self .decompress = zlib .decompressobj (- zlib .MAX_WBITS )
@@ -305,6 +360,9 @@ def _read(self, size=1024):
305360
306361 if buf == b"" :
307362 uncompress = self .decompress .flush ()
363+ # Prepend the already read bytes to the fileobj to they can be
364+ # seen by _read_eof()
365+ self .fileobj .prepend (self .decompress .unused_data , True )
308366 self ._read_eof ()
309367 self ._add_read_data ( uncompress )
310368 raise EOFError ('Reached EOF' )
@@ -316,10 +374,9 @@ def _read(self, size=1024):
316374 # Ending case: we've come to the end of a member in the file,
317375 # so seek back to the start of the unused data, finish up
318376 # this member, and read a new gzip header.
319- # (The number of bytes to seek back is the length of the unused
320- # data, minus 8 because _read_eof() will rewind a further 8 bytes)
321- self .fileobj .seek ( - len (self .decompress .unused_data )+ 8 , 1 )
322-
377+ # Prepend the already read bytes to the fileobj to they can be
378+ # seen by _read_eof() and _read_gzip_header()
379+ self .fileobj .prepend (self .decompress .unused_data , True )
323380 # Check the CRC and file size, and set the flag so we read
324381 # a new member on the next call
325382 self ._read_eof ()
@@ -334,12 +391,10 @@ def _add_read_data(self, data):
334391 self .size = self .size + len (data )
335392
336393 def _read_eof (self ):
337- # We've read to the end of the file, so we have to rewind in order
338- # to reread the 8 bytes containing the CRC and the file size.
394+ # We've read to the end of the file
339395 # We check the that the computed CRC and size of the
340396 # uncompressed data matches the stored values. Note that the size
341397 # stored is the true file size mod 2**32.
342- self .fileobj .seek (- 8 , 1 )
343398 crc32 = read32 (self .fileobj )
344399 isize = read32 (self .fileobj ) # may exceed 2GB
345400 if crc32 != self .crc :
@@ -355,7 +410,7 @@ def _read_eof(self):
355410 while c == b"\x00 " :
356411 c = self .fileobj .read (1 )
357412 if c :
358- self .fileobj .seek ( - 1 , 1 )
413+ self .fileobj .prepend ( c , True )
359414
360415 @property
361416 def closed (self ):
0 commit comments