@@ -204,29 +204,31 @@ def _check_can_seek(self):
204204
205205 # Fill the readahead buffer if it is empty. Returns False on EOF.
206206 def _fill_buffer (self ):
207- if self ._buffer :
208- return True
209-
210- if self ._decompressor .unused_data :
211- rawblock = self ._decompressor .unused_data
212- else :
213- rawblock = self ._fp .read (_BUFFER_SIZE )
214-
215- if not rawblock :
216- if self ._decompressor .eof :
217- self ._mode = _MODE_READ_EOF
218- self ._size = self ._pos
219- return False
207+ # Depending on the input data, our call to the decompressor may not
208+ # return any data. In this case, try again after reading another block.
209+ while True :
210+ if self ._buffer :
211+ return True
212+
213+ if self ._decompressor .unused_data :
214+ rawblock = self ._decompressor .unused_data
220215 else :
221- raise EOFError ("Compressed file ended before the "
222- "end-of-stream marker was reached" )
223-
224- # Continue to next stream.
225- if self ._decompressor .eof :
226- self ._decompressor = LZMADecompressor (** self ._init_args )
216+ rawblock = self ._fp .read (_BUFFER_SIZE )
217+
218+ if not rawblock :
219+ if self ._decompressor .eof :
220+ self ._mode = _MODE_READ_EOF
221+ self ._size = self ._pos
222+ return False
223+ else :
224+ raise EOFError ("Compressed file ended before the "
225+ "end-of-stream marker was reached" )
226+
227+ # Continue to next stream.
228+ if self ._decompressor .eof :
229+ self ._decompressor = LZMADecompressor (** self ._init_args )
227230
228- self ._buffer = self ._decompressor .decompress (rawblock )
229- return True
231+ self ._buffer = self ._decompressor .decompress (rawblock )
230232
231233 # Read data until EOF.
232234 # If return_data is false, consume the data without returning it.
@@ -284,11 +286,14 @@ def read(self, size=-1):
284286 return self ._read_block (size )
285287
286288 def read1 (self , size = - 1 ):
287- """Read up to size uncompressed bytes with at most one read
288- from the underlying stream.
289+ """Read up to size uncompressed bytes, while trying to avoid
290+ making multiple reads from the underlying stream.
289291
290292 Returns b"" if the file is at EOF.
291293 """
294+ # Usually, read1() calls _fp.read() at most once. However, sometimes
295+ # this does not give enough data for the decompressor to make progress.
296+ # In this case we make multiple reads, to avoid returning b"".
292297 self ._check_can_read ()
293298 if (size == 0 or self ._mode == _MODE_READ_EOF or
294299 not self ._fill_buffer ()):
0 commit comments