@@ -33,9 +33,6 @@ def write32u(output, value):
3333 # or unsigned.
3434 output .write (struct .pack ("<L" , value ))
3535
36- def read32 (input ):
37- return struct .unpack ("<I" , input .read (4 ))[0 ]
38-
3936def open (filename , mode = "rb" , compresslevel = 9 ):
4037 """Shorthand for GzipFile(filename, mode, compresslevel).
4138
@@ -259,27 +256,31 @@ def _init_read(self):
259256 self .crc = zlib .crc32 (b"" ) & 0xffffffff
260257 self .size = 0
261258
259+ def _read_exact (self , n ):
260+ data = self .fileobj .read (n )
261+ while len (data ) < n :
262+ b = self .fileobj .read (n - len (data ))
263+ if not b :
264+ raise EOFError ("Compressed file ended before the "
265+ "end-of-stream marker was reached" )
266+ data += b
267+ return data
268+
262269 def _read_gzip_header (self ):
263270 magic = self .fileobj .read (2 )
264271 if magic == b'' :
265- raise EOFError ( "Reached EOF" )
272+ return False
266273
267274 if magic != b'\037 \213 ' :
268275 raise IOError ('Not a gzipped file' )
269- method = ord ( self .fileobj .read (1 ) )
276+
277+ method , flag , self .mtime = struct .unpack ("<BBIxx" , self ._read_exact (8 ))
270278 if method != 8 :
271279 raise IOError ('Unknown compression method' )
272- flag = ord ( self .fileobj .read (1 ) )
273- self .mtime = read32 (self .fileobj )
274- # extraflag = self.fileobj.read(1)
275- # os = self.fileobj.read(1)
276- self .fileobj .read (2 )
277280
278281 if flag & FEXTRA :
279282 # Read & discard the extra field, if present
280- xlen = ord (self .fileobj .read (1 ))
281- xlen = xlen + 256 * ord (self .fileobj .read (1 ))
282- self .fileobj .read (xlen )
283+ self ._read_exact (struct .unpack ("<H" , self ._read_exact (2 )))
283284 if flag & FNAME :
284285 # Read and discard a null-terminated string containing the filename
285286 while True :
@@ -293,12 +294,13 @@ def _read_gzip_header(self):
293294 if not s or s == b'\000 ' :
294295 break
295296 if flag & FHCRC :
296- self .fileobj . read (2 ) # Read & discard the 16-bit header CRC
297+ self ._read_exact (2 ) # Read & discard the 16-bit header CRC
297298
298299 unused = self .fileobj .unused ()
299300 if unused :
300301 uncompress = self .decompress .decompress (unused )
301302 self ._add_read_data (uncompress )
303+ return True
302304
303305 def write (self ,data ):
304306 self ._check_closed ()
@@ -332,20 +334,16 @@ def read(self, size=-1):
332334
333335 readsize = 1024
334336 if size < 0 : # get the whole thing
335- try :
336- while True :
337- self ._read (readsize )
338- readsize = min (self .max_read_chunk , readsize * 2 )
339- except EOFError :
340- size = self .extrasize
337+ while self ._read (readsize ):
338+ readsize = min (self .max_read_chunk , readsize * 2 )
339+ size = self .extrasize
341340 else : # just get some more of it
342- try :
343- while size > self .extrasize :
344- self ._read (readsize )
345- readsize = min (self .max_read_chunk , readsize * 2 )
346- except EOFError :
347- if size > self .extrasize :
348- size = self .extrasize
341+ while size > self .extrasize :
342+ if not self ._read (readsize ):
343+ if size > self .extrasize :
344+ size = self .extrasize
345+ break
346+ readsize = min (self .max_read_chunk , readsize * 2 )
349347
350348 offset = self .offset - self .extrastart
351349 chunk = self .extrabuf [offset : offset + size ]
@@ -366,12 +364,9 @@ def peek(self, n):
366364 if self .extrasize == 0 :
367365 if self .fileobj is None :
368366 return b''
369- try :
370- # Ensure that we don't return b"" if we haven't reached EOF.
371- while self .extrasize == 0 :
372- # 1024 is the same buffering heuristic used in read()
373- self ._read (max (n , 1024 ))
374- except EOFError :
367+ # Ensure that we don't return b"" if we haven't reached EOF.
368+ # 1024 is the same buffering heuristic used in read()
369+ while self .extrasize == 0 and self ._read (max (n , 1024 )):
375370 pass
376371 offset = self .offset - self .extrastart
377372 remaining = self .extrasize
@@ -384,13 +379,14 @@ def _unread(self, buf):
384379
385380 def _read (self , size = 1024 ):
386381 if self .fileobj is None :
387- raise EOFError ( "Reached EOF" )
382+ return False
388383
389384 if self ._new_member :
390385 # If the _new_member flag is set, we have to
391386 # jump to the next member, if there is one.
392387 self ._init_read ()
393- self ._read_gzip_header ()
388+ if not self ._read_gzip_header ():
389+ return False
394390 self .decompress = zlib .decompressobj (- zlib .MAX_WBITS )
395391 self ._new_member = False
396392
@@ -407,7 +403,7 @@ def _read(self, size=1024):
407403 self .fileobj .prepend (self .decompress .unused_data , True )
408404 self ._read_eof ()
409405 self ._add_read_data ( uncompress )
410- raise EOFError ( 'Reached EOF' )
406+ return False
411407
412408 uncompress = self .decompress .decompress (buf )
413409 self ._add_read_data ( uncompress )
@@ -423,6 +419,7 @@ def _read(self, size=1024):
423419 # a new member on the next call
424420 self ._read_eof ()
425421 self ._new_member = True
422+ return True
426423
427424 def _add_read_data (self , data ):
428425 self .crc = zlib .crc32 (data , self .crc ) & 0xffffffff
@@ -437,8 +434,7 @@ def _read_eof(self):
437434 # We check the that the computed CRC and size of the
438435 # uncompressed data matches the stored values. Note that the size
439436 # stored is the true file size mod 2**32.
440- crc32 = read32 (self .fileobj )
441- isize = read32 (self .fileobj ) # may exceed 2GB
437+ crc32 , isize = struct .unpack ("<II" , self ._read_exact (8 ))
442438 if crc32 != self .crc :
443439 raise IOError ("CRC check failed %s != %s" % (hex (crc32 ),
444440 hex (self .crc )))
0 commit comments