@@ -65,6 +65,9 @@ def write32u(output, value):
6565 # or unsigned.
6666 output .write (struct .pack ("<L" , value ))
6767
68+ def read32 (input ):
69+ return struct .unpack ("<I" , input .read (4 ))[0 ]
70+
6871class _PaddedFile :
6972 """Minimal read-only file object that prepends a string to the contents
7073 of an actual file. Shouldn't be used outside of gzip.py, as it lacks
@@ -278,32 +281,28 @@ def _init_read(self):
278281 self .crc = zlib .crc32 (b"" ) & 0xffffffff
279282 self .size = 0
280283
281- def _read_exact (self , n ):
282- data = self .fileobj .read (n )
283- while len (data ) < n :
284- b = self .fileobj .read (n - len (data ))
285- if not b :
286- raise EOFError ("Compressed file ended before the "
287- "end-of-stream marker was reached" )
288- data += b
289- return data
290-
291284 def _read_gzip_header (self ):
292285 magic = self .fileobj .read (2 )
293286 if magic == b'' :
294- return False
287+ raise EOFError ( "Reached EOF" )
295288
296289 if magic != b'\037 \213 ' :
297290 raise IOError ('Not a gzipped file' )
298291
299- method , flag , self . mtime = struct . unpack ( "<BBIxx" , self ._read_exact ( 8 ) )
292+ method = ord ( self .fileobj . read ( 1 ) )
300293 if method != 8 :
301294 raise IOError ('Unknown compression method' )
295+ flag = ord ( self .fileobj .read (1 ) )
296+ self .mtime = read32 (self .fileobj )
297+ # extraflag = self.fileobj.read(1)
298+ # os = self.fileobj.read(1)
299+ self .fileobj .read (2 )
302300
303301 if flag & FEXTRA :
304302 # Read & discard the extra field, if present
305- extra_len , = struct .unpack ("<H" , self ._read_exact (2 ))
306- self ._read_exact (extra_len )
303+ xlen = ord (self .fileobj .read (1 ))
304+ xlen = xlen + 256 * ord (self .fileobj .read (1 ))
305+ self .fileobj .read (xlen )
307306 if flag & FNAME :
308307 # Read and discard a null-terminated string containing the filename
309308 while True :
@@ -317,13 +316,12 @@ def _read_gzip_header(self):
317316 if not s or s == b'\000 ' :
318317 break
319318 if flag & FHCRC :
320- self ._read_exact (2 ) # Read & discard the 16-bit header CRC
319+ self .fileobj . read (2 ) # Read & discard the 16-bit header CRC
321320
322321 unused = self .fileobj .unused ()
323322 if unused :
324323 uncompress = self .decompress .decompress (unused )
325324 self ._add_read_data (uncompress )
326- return True
327325
328326 def write (self ,data ):
329327 self ._check_closed ()
@@ -357,16 +355,20 @@ def read(self, size=-1):
357355
358356 readsize = 1024
359357 if size < 0 : # get the whole thing
360- while self ._read (readsize ):
361- readsize = min (self .max_read_chunk , readsize * 2 )
362- size = self .extrasize
358+ try :
359+ while True :
360+ self ._read (readsize )
361+ readsize = min (self .max_read_chunk , readsize * 2 )
362+ except EOFError :
363+ size = self .extrasize
363364 else : # just get some more of it
364- while size > self .extrasize :
365- if not self ._read (readsize ):
366- if size > self .extrasize :
367- size = self .extrasize
368- break
369- readsize = min (self .max_read_chunk , readsize * 2 )
365+ try :
366+ while size > self .extrasize :
367+ self ._read (readsize )
368+ readsize = min (self .max_read_chunk , readsize * 2 )
369+ except EOFError :
370+ if size > self .extrasize :
371+ size = self .extrasize
370372
371373 offset = self .offset - self .extrastart
372374 chunk = self .extrabuf [offset : offset + size ]
@@ -384,9 +386,12 @@ def read1(self, size=-1):
384386 if self .extrasize <= 0 and self .fileobj is None :
385387 return b''
386388
387- # For certain input data, a single call to _read() may not return
388- # any data. In this case, retry until we get some data or reach EOF.
389- while self .extrasize <= 0 and self ._read ():
389+ try :
390+ # For certain input data, a single call to _read() may not return
391+ # any data. In this case, retry until we get some data or reach EOF.
392+ while self .extrasize <= 0 :
393+ self ._read ()
394+ except EOFError :
390395 pass
391396 if size < 0 or size > self .extrasize :
392397 size = self .extrasize
@@ -409,9 +414,12 @@ def peek(self, n):
409414 if self .extrasize == 0 :
410415 if self .fileobj is None :
411416 return b''
412- # Ensure that we don't return b"" if we haven't reached EOF.
413- # 1024 is the same buffering heuristic used in read()
414- while self .extrasize == 0 and self ._read (max (n , 1024 )):
417+ try :
418+ # Ensure that we don't return b"" if we haven't reached EOF.
419+ while self .extrasize == 0 :
420+ # 1024 is the same buffering heuristic used in read()
421+ self ._read (max (n , 1024 ))
422+ except EOFError :
415423 pass
416424 offset = self .offset - self .extrastart
417425 remaining = self .extrasize
@@ -424,14 +432,13 @@ def _unread(self, buf):
424432
425433 def _read (self , size = 1024 ):
426434 if self .fileobj is None :
427- return False
435+ raise EOFError ( "Reached EOF" )
428436
429437 if self ._new_member :
430438 # If the _new_member flag is set, we have to
431439 # jump to the next member, if there is one.
432440 self ._init_read ()
433- if not self ._read_gzip_header ():
434- return False
441+ self ._read_gzip_header ()
435442 self .decompress = zlib .decompressobj (- zlib .MAX_WBITS )
436443 self ._new_member = False
437444
@@ -448,7 +455,7 @@ def _read(self, size=1024):
448455 self .fileobj .prepend (self .decompress .unused_data , True )
449456 self ._read_eof ()
450457 self ._add_read_data ( uncompress )
451- return False
458+ raise EOFError ( 'Reached EOF' )
452459
453460 uncompress = self .decompress .decompress (buf )
454461 self ._add_read_data ( uncompress )
@@ -464,7 +471,6 @@ def _read(self, size=1024):
464471 # a new member on the next call
465472 self ._read_eof ()
466473 self ._new_member = True
467- return True
468474
469475 def _add_read_data (self , data ):
470476 self .crc = zlib .crc32 (data , self .crc ) & 0xffffffff
@@ -479,7 +485,8 @@ def _read_eof(self):
479485 # We check the that the computed CRC and size of the
480486 # uncompressed data matches the stored values. Note that the size
481487 # stored is the true file size mod 2**32.
482- crc32 , isize = struct .unpack ("<II" , self ._read_exact (8 ))
488+ crc32 = read32 (self .fileobj )
489+ isize = read32 (self .fileobj ) # may exceed 2GB
483490 if crc32 != self .crc :
484491 raise IOError ("CRC check failed %s != %s" % (hex (crc32 ),
485492 hex (self .crc )))
0 commit comments