@@ -33,6 +33,9 @@ def write32u(output, value):
3333 # or unsigned.
3434 output .write (struct .pack ("<L" , value ))
3535
36+ def read32 (input ):
37+ return struct .unpack ("<I" , input .read (4 ))[0 ]
38+
3639def open (filename , mode = "rb" , compresslevel = 9 ):
3740 """Shorthand for GzipFile(filename, mode, compresslevel).
3841
@@ -256,32 +259,27 @@ def _init_read(self):
256259 self .crc = zlib .crc32 (b"" ) & 0xffffffff
257260 self .size = 0
258261
259- def _read_exact (self , n ):
260- data = self .fileobj .read (n )
261- while len (data ) < n :
262- b = self .fileobj .read (n - len (data ))
263- if not b :
264- raise EOFError ("Compressed file ended before the "
265- "end-of-stream marker was reached" )
266- data += b
267- return data
268-
269262 def _read_gzip_header (self ):
270263 magic = self .fileobj .read (2 )
271264 if magic == b'' :
272- return False
265+ raise EOFError ( "Reached EOF" )
273266
274267 if magic != b'\037 \213 ' :
275268 raise IOError ('Not a gzipped file' )
276-
277- method , flag , self .mtime = struct .unpack ("<BBIxx" , self ._read_exact (8 ))
269+ method = ord ( self .fileobj .read (1 ) )
278270 if method != 8 :
279271 raise IOError ('Unknown compression method' )
272+ flag = ord ( self .fileobj .read (1 ) )
273+ self .mtime = read32 (self .fileobj )
274+ # extraflag = self.fileobj.read(1)
275+ # os = self.fileobj.read(1)
276+ self .fileobj .read (2 )
280277
281278 if flag & FEXTRA :
282279 # Read & discard the extra field, if present
283- extra_len , = struct .unpack ("<H" , self ._read_exact (2 ))
284- self ._read_exact (extra_len )
280+ xlen = ord (self .fileobj .read (1 ))
281+ xlen = xlen + 256 * ord (self .fileobj .read (1 ))
282+ self .fileobj .read (xlen )
285283 if flag & FNAME :
286284 # Read and discard a null-terminated string containing the filename
287285 while True :
@@ -295,13 +293,12 @@ def _read_gzip_header(self):
295293 if not s or s == b'\000 ' :
296294 break
297295 if flag & FHCRC :
298- self ._read_exact (2 ) # Read & discard the 16-bit header CRC
296+ self .fileobj . read (2 ) # Read & discard the 16-bit header CRC
299297
300298 unused = self .fileobj .unused ()
301299 if unused :
302300 uncompress = self .decompress .decompress (unused )
303301 self ._add_read_data (uncompress )
304- return True
305302
306303 def write (self ,data ):
307304 self ._check_closed ()
@@ -335,16 +332,20 @@ def read(self, size=-1):
335332
336333 readsize = 1024
337334 if size < 0 : # get the whole thing
338- while self ._read (readsize ):
339- readsize = min (self .max_read_chunk , readsize * 2 )
340- size = self .extrasize
335+ try :
336+ while True :
337+ self ._read (readsize )
338+ readsize = min (self .max_read_chunk , readsize * 2 )
339+ except EOFError :
340+ size = self .extrasize
341341 else : # just get some more of it
342- while size > self .extrasize :
343- if not self ._read (readsize ):
344- if size > self .extrasize :
345- size = self .extrasize
346- break
347- readsize = min (self .max_read_chunk , readsize * 2 )
342+ try :
343+ while size > self .extrasize :
344+ self ._read (readsize )
345+ readsize = min (self .max_read_chunk , readsize * 2 )
346+ except EOFError :
347+ if size > self .extrasize :
348+ size = self .extrasize
348349
349350 offset = self .offset - self .extrastart
350351 chunk = self .extrabuf [offset : offset + size ]
@@ -365,9 +366,12 @@ def peek(self, n):
365366 if self .extrasize == 0 :
366367 if self .fileobj is None :
367368 return b''
368- # Ensure that we don't return b"" if we haven't reached EOF.
369- # 1024 is the same buffering heuristic used in read()
370- while self .extrasize == 0 and self ._read (max (n , 1024 )):
369+ try :
370+ # Ensure that we don't return b"" if we haven't reached EOF.
371+ while self .extrasize == 0 :
372+ # 1024 is the same buffering heuristic used in read()
373+ self ._read (max (n , 1024 ))
374+ except EOFError :
371375 pass
372376 offset = self .offset - self .extrastart
373377 remaining = self .extrasize
@@ -380,14 +384,13 @@ def _unread(self, buf):
380384
381385 def _read (self , size = 1024 ):
382386 if self .fileobj is None :
383- return False
387+ raise EOFError ( "Reached EOF" )
384388
385389 if self ._new_member :
386390 # If the _new_member flag is set, we have to
387391 # jump to the next member, if there is one.
388392 self ._init_read ()
389- if not self ._read_gzip_header ():
390- return False
393+ self ._read_gzip_header ()
391394 self .decompress = zlib .decompressobj (- zlib .MAX_WBITS )
392395 self ._new_member = False
393396
@@ -404,7 +407,7 @@ def _read(self, size=1024):
404407 self .fileobj .prepend (self .decompress .unused_data , True )
405408 self ._read_eof ()
406409 self ._add_read_data ( uncompress )
407- return False
410+ raise EOFError ( 'Reached EOF' )
408411
409412 uncompress = self .decompress .decompress (buf )
410413 self ._add_read_data ( uncompress )
@@ -420,7 +423,6 @@ def _read(self, size=1024):
420423 # a new member on the next call
421424 self ._read_eof ()
422425 self ._new_member = True
423- return True
424426
425427 def _add_read_data (self , data ):
426428 self .crc = zlib .crc32 (data , self .crc ) & 0xffffffff
@@ -435,7 +437,8 @@ def _read_eof(self):
435437 # We check the that the computed CRC and size of the
436438 # uncompressed data matches the stored values. Note that the size
437439 # stored is the true file size mod 2**32.
438- crc32 , isize = struct .unpack ("<II" , self ._read_exact (8 ))
440+ crc32 = read32 (self .fileobj )
441+ isize = read32 (self .fileobj ) # may exceed 2GB
439442 if crc32 != self .crc :
440443 raise IOError ("CRC check failed %s != %s" % (hex (crc32 ),
441444 hex (self .crc )))
0 commit comments