Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 37d3ff1

Browse files
committed
#15546: Fix {GzipFile,LZMAFile}.read1()'s handling of pathological input data.
1 parent 9c92a69 commit 37d3ff1

3 files changed

Lines changed: 35 additions & 24 deletions

File tree

Lib/gzip.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,10 @@ def read1(self, size=-1):
385385
return b''
386386

387387
try:
388-
self._read()
388+
# For certain input data, a single call to _read() may not return
389+
# any data. In this case, retry until we get some data or reach EOF.
390+
while self.extrasize <= 0:
391+
self._read()
389392
except EOFError:
390393
pass
391394
if size < 0 or size > self.extrasize:

Lib/lzma.py

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -204,29 +204,31 @@ def _check_can_seek(self):
204204

205205
# Fill the readahead buffer if it is empty. Returns False on EOF.
206206
def _fill_buffer(self):
207-
if self._buffer:
208-
return True
209-
210-
if self._decompressor.unused_data:
211-
rawblock = self._decompressor.unused_data
212-
else:
213-
rawblock = self._fp.read(_BUFFER_SIZE)
214-
215-
if not rawblock:
216-
if self._decompressor.eof:
217-
self._mode = _MODE_READ_EOF
218-
self._size = self._pos
219-
return False
207+
# Depending on the input data, our call to the decompressor may not
208+
# return any data. In this case, try again after reading another block.
209+
while True:
210+
if self._buffer:
211+
return True
212+
213+
if self._decompressor.unused_data:
214+
rawblock = self._decompressor.unused_data
220215
else:
221-
raise EOFError("Compressed file ended before the "
222-
"end-of-stream marker was reached")
223-
224-
# Continue to next stream.
225-
if self._decompressor.eof:
226-
self._decompressor = LZMADecompressor(**self._init_args)
216+
rawblock = self._fp.read(_BUFFER_SIZE)
217+
218+
if not rawblock:
219+
if self._decompressor.eof:
220+
self._mode = _MODE_READ_EOF
221+
self._size = self._pos
222+
return False
223+
else:
224+
raise EOFError("Compressed file ended before the "
225+
"end-of-stream marker was reached")
226+
227+
# Continue to next stream.
228+
if self._decompressor.eof:
229+
self._decompressor = LZMADecompressor(**self._init_args)
227230

228-
self._buffer = self._decompressor.decompress(rawblock)
229-
return True
231+
self._buffer = self._decompressor.decompress(rawblock)
230232

231233
# Read data until EOF.
232234
# If return_data is false, consume the data without returning it.
@@ -284,11 +286,14 @@ def read(self, size=-1):
284286
return self._read_block(size)
285287

286288
def read1(self, size=-1):
287-
"""Read up to size uncompressed bytes with at most one read
288-
from the underlying stream.
289+
"""Read up to size uncompressed bytes, while trying to avoid
290+
making multiple reads from the underlying stream.
289291
290292
Returns b"" if the file is at EOF.
291293
"""
294+
# Usually, read1() calls _fp.read() at most once. However, sometimes
295+
# this does not give enough data for the decompressor to make progress.
296+
# In this case we make multiple reads, to avoid returning b"".
292297
self._check_can_read()
293298
if (size == 0 or self._mode == _MODE_READ_EOF or
294299
not self._fill_buffer()):

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ Core and Builtins
7777
Library
7878
-------
7979

80+
- Issue #15546: Fix handling of pathological input data in the read1() method of
81+
the BZ2File, GzipFile and LZMAFile classes.
82+
8083
- Issue #13052: Fix IDLE crashing when replace string in Search/Replace dialog
8184
ended with '\'. Patch by Roger Serwy.
8285

0 commit comments

Comments
 (0)