Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b3bd624

Browse files
committed
Back out patch for #1159051, which caused backwards compatibility problems.
1 parent 64949fa commit b3bd624

5 files changed

Lines changed: 44 additions & 82 deletions

File tree

Lib/gzip.py

Lines changed: 44 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ def write32u(output, value):
6565
# or unsigned.
6666
output.write(struct.pack("<L", value))
6767

68+
def read32(input):
69+
return struct.unpack("<I", input.read(4))[0]
70+
6871
class _PaddedFile:
6972
"""Minimal read-only file object that prepends a string to the contents
7073
of an actual file. Shouldn't be used outside of gzip.py, as it lacks
@@ -278,32 +281,28 @@ def _init_read(self):
278281
self.crc = zlib.crc32(b"") & 0xffffffff
279282
self.size = 0
280283

281-
def _read_exact(self, n):
282-
data = self.fileobj.read(n)
283-
while len(data) < n:
284-
b = self.fileobj.read(n - len(data))
285-
if not b:
286-
raise EOFError("Compressed file ended before the "
287-
"end-of-stream marker was reached")
288-
data += b
289-
return data
290-
291284
def _read_gzip_header(self):
292285
magic = self.fileobj.read(2)
293286
if magic == b'':
294-
return False
287+
raise EOFError("Reached EOF")
295288

296289
if magic != b'\037\213':
297290
raise IOError('Not a gzipped file')
298291

299-
method, flag, self.mtime = struct.unpack("<BBIxx", self._read_exact(8))
292+
method = ord( self.fileobj.read(1) )
300293
if method != 8:
301294
raise IOError('Unknown compression method')
295+
flag = ord( self.fileobj.read(1) )
296+
self.mtime = read32(self.fileobj)
297+
# extraflag = self.fileobj.read(1)
298+
# os = self.fileobj.read(1)
299+
self.fileobj.read(2)
302300

303301
if flag & FEXTRA:
304302
# Read & discard the extra field, if present
305-
extra_len, = struct.unpack("<H", self._read_exact(2))
306-
self._read_exact(extra_len)
303+
xlen = ord(self.fileobj.read(1))
304+
xlen = xlen + 256*ord(self.fileobj.read(1))
305+
self.fileobj.read(xlen)
307306
if flag & FNAME:
308307
# Read and discard a null-terminated string containing the filename
309308
while True:
@@ -317,13 +316,12 @@ def _read_gzip_header(self):
317316
if not s or s==b'\000':
318317
break
319318
if flag & FHCRC:
320-
self._read_exact(2) # Read & discard the 16-bit header CRC
319+
self.fileobj.read(2) # Read & discard the 16-bit header CRC
321320

322321
unused = self.fileobj.unused()
323322
if unused:
324323
uncompress = self.decompress.decompress(unused)
325324
self._add_read_data(uncompress)
326-
return True
327325

328326
def write(self,data):
329327
self._check_closed()
@@ -357,16 +355,20 @@ def read(self, size=-1):
357355

358356
readsize = 1024
359357
if size < 0: # get the whole thing
360-
while self._read(readsize):
361-
readsize = min(self.max_read_chunk, readsize * 2)
362-
size = self.extrasize
358+
try:
359+
while True:
360+
self._read(readsize)
361+
readsize = min(self.max_read_chunk, readsize * 2)
362+
except EOFError:
363+
size = self.extrasize
363364
else: # just get some more of it
364-
while size > self.extrasize:
365-
if not self._read(readsize):
366-
if size > self.extrasize:
367-
size = self.extrasize
368-
break
369-
readsize = min(self.max_read_chunk, readsize * 2)
365+
try:
366+
while size > self.extrasize:
367+
self._read(readsize)
368+
readsize = min(self.max_read_chunk, readsize * 2)
369+
except EOFError:
370+
if size > self.extrasize:
371+
size = self.extrasize
370372

371373
offset = self.offset - self.extrastart
372374
chunk = self.extrabuf[offset: offset + size]
@@ -384,9 +386,12 @@ def read1(self, size=-1):
384386
if self.extrasize <= 0 and self.fileobj is None:
385387
return b''
386388

387-
# For certain input data, a single call to _read() may not return
388-
# any data. In this case, retry until we get some data or reach EOF.
389-
while self.extrasize <= 0 and self._read():
389+
try:
390+
# For certain input data, a single call to _read() may not return
391+
# any data. In this case, retry until we get some data or reach EOF.
392+
while self.extrasize <= 0:
393+
self._read()
394+
except EOFError:
390395
pass
391396
if size < 0 or size > self.extrasize:
392397
size = self.extrasize
@@ -409,9 +414,12 @@ def peek(self, n):
409414
if self.extrasize == 0:
410415
if self.fileobj is None:
411416
return b''
412-
# Ensure that we don't return b"" if we haven't reached EOF.
413-
# 1024 is the same buffering heuristic used in read()
414-
while self.extrasize == 0 and self._read(max(n, 1024)):
417+
try:
418+
# Ensure that we don't return b"" if we haven't reached EOF.
419+
while self.extrasize == 0:
420+
# 1024 is the same buffering heuristic used in read()
421+
self._read(max(n, 1024))
422+
except EOFError:
415423
pass
416424
offset = self.offset - self.extrastart
417425
remaining = self.extrasize
@@ -424,14 +432,13 @@ def _unread(self, buf):
424432

425433
def _read(self, size=1024):
426434
if self.fileobj is None:
427-
return False
435+
raise EOFError("Reached EOF")
428436

429437
if self._new_member:
430438
# If the _new_member flag is set, we have to
431439
# jump to the next member, if there is one.
432440
self._init_read()
433-
if not self._read_gzip_header():
434-
return False
441+
self._read_gzip_header()
435442
self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
436443
self._new_member = False
437444

@@ -448,7 +455,7 @@ def _read(self, size=1024):
448455
self.fileobj.prepend(self.decompress.unused_data, True)
449456
self._read_eof()
450457
self._add_read_data( uncompress )
451-
return False
458+
raise EOFError('Reached EOF')
452459

453460
uncompress = self.decompress.decompress(buf)
454461
self._add_read_data( uncompress )
@@ -464,7 +471,6 @@ def _read(self, size=1024):
464471
# a new member on the next call
465472
self._read_eof()
466473
self._new_member = True
467-
return True
468474

469475
def _add_read_data(self, data):
470476
self.crc = zlib.crc32(data, self.crc) & 0xffffffff
@@ -479,7 +485,8 @@ def _read_eof(self):
479485
# We check the that the computed CRC and size of the
480486
# uncompressed data matches the stored values. Note that the size
481487
# stored is the true file size mod 2**32.
482-
crc32, isize = struct.unpack("<II", self._read_exact(8))
488+
crc32 = read32(self.fileobj)
489+
isize = read32(self.fileobj) # may exceed 2GB
483490
if crc32 != self.crc:
484491
raise IOError("CRC check failed %s != %s" % (hex(crc32),
485492
hex(self.crc)))

Lib/test/test_bz2.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -577,20 +577,6 @@ def testSeekBackwardsBytesIO(self):
577577
bz2f.seek(-150, 1)
578578
self.assertEqual(bz2f.read(), self.TEXT[500-150:])
579579

580-
def test_read_truncated(self):
581-
# Drop the eos_magic field (6 bytes) and CRC (4 bytes).
582-
truncated = self.DATA[:-10]
583-
with BZ2File(BytesIO(truncated)) as f:
584-
self.assertRaises(EOFError, f.read)
585-
with BZ2File(BytesIO(truncated)) as f:
586-
self.assertEqual(f.read(len(self.TEXT)), self.TEXT)
587-
self.assertRaises(EOFError, f.read, 1)
588-
# Incomplete 4-byte file header, and block header of at least 146 bits.
589-
for i in range(22):
590-
with BZ2File(BytesIO(truncated[:i])) as f:
591-
self.assertRaises(EOFError, f.read, 1)
592-
593-
594580
class BZ2CompressorTest(BaseTest):
595581
def testCompress(self):
596582
bz2c = BZ2Compressor()

Lib/test/test_gzip.py

100644100755
Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -389,20 +389,6 @@ def test_decompress(self):
389389
datac = gzip.compress(data)
390390
self.assertEqual(gzip.decompress(datac), data)
391391

392-
def test_read_truncated(self):
393-
data = data1*50
394-
# Drop the CRC (4 bytes) and file size (4 bytes).
395-
truncated = gzip.compress(data)[:-8]
396-
with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f:
397-
self.assertRaises(EOFError, f.read)
398-
with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f:
399-
self.assertEqual(f.read(len(data)), data)
400-
self.assertRaises(EOFError, f.read, 1)
401-
# Incomplete 10-byte header.
402-
for i in range(2, 10):
403-
with gzip.GzipFile(fileobj=io.BytesIO(truncated[:i])) as f:
404-
self.assertRaises(EOFError, f.read, 1)
405-
406392
def test_read_with_extra(self):
407393
# Gzip data with an extra field
408394
gzdata = (b'\x1f\x8b\x08\x04\xb2\x17cQ\x02\xff'

Lib/test/test_lzma.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -669,20 +669,6 @@ def test_read_incomplete(self):
669669
with LZMAFile(BytesIO(COMPRESSED_XZ[:128])) as f:
670670
self.assertRaises(EOFError, f.read)
671671

672-
def test_read_truncated(self):
673-
# Drop stream footer: CRC (4 bytes), index size (4 bytes),
674-
# flags (2 bytes) and magic number (2 bytes).
675-
truncated = COMPRESSED_XZ[:-12]
676-
with LZMAFile(BytesIO(truncated)) as f:
677-
self.assertRaises(EOFError, f.read)
678-
with LZMAFile(BytesIO(truncated)) as f:
679-
self.assertEqual(f.read(len(INPUT)), INPUT)
680-
self.assertRaises(EOFError, f.read, 1)
681-
# Incomplete 12-byte header.
682-
for i in range(12):
683-
with LZMAFile(BytesIO(truncated[:i])) as f:
684-
self.assertRaises(EOFError, f.read, 1)
685-
686672
def test_read_bad_args(self):
687673
f = LZMAFile(BytesIO(COMPRESSED_XZ))
688674
f.close()

Misc/NEWS

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -628,9 +628,6 @@ Library
628628
current directory on Unix and no longer searches a relative file path with
629629
a directory part in PATH directories. Patch by Thomas Kluyver.
630630

631-
- Issue #1159051: GzipFile now raises EOFError when reading a corrupted file
632-
with truncated header or footer.
633-
634631
- Issue #16993: shutil.which() now preserves the case of the path and extension
635632
on Windows.
636633

0 commit comments

Comments
 (0)