@@ -174,29 +174,31 @@ def _check_can_seek(self):
174174
175175 # Fill the readahead buffer if it is empty. Returns False on EOF.
176176 def _fill_buffer (self ):
177- if self ._buffer :
178- return True
179-
180- if self ._decompressor .unused_data :
181- rawblock = self ._decompressor .unused_data
182- else :
183- rawblock = self ._fp .read (_BUFFER_SIZE )
184-
185- if not rawblock :
186- if self ._decompressor .eof :
187- self ._mode = _MODE_READ_EOF
188- self ._size = self ._pos
189- return False
177+ # Depending on the input data, our call to the decompressor may not
178+ # return any data. In this case, try again after reading another block.
179+ while True :
180+ if self ._buffer :
181+ return True
182+
183+ if self ._decompressor .unused_data :
184+ rawblock = self ._decompressor .unused_data
190185 else :
191- raise EOFError ("Compressed file ended before the "
192- "end-of-stream marker was reached" )
193-
194- # Continue to next stream.
195- if self ._decompressor .eof :
196- self ._decompressor = BZ2Decompressor ()
186+ rawblock = self ._fp .read (_BUFFER_SIZE )
187+
188+ if not rawblock :
189+ if self ._decompressor .eof :
190+ self ._mode = _MODE_READ_EOF
191+ self ._size = self ._pos
192+ return False
193+ else :
194+ raise EOFError ("Compressed file ended before the "
195+ "end-of-stream marker was reached" )
196+
197+ # Continue to next stream.
198+ if self ._decompressor .eof :
199+ self ._decompressor = BZ2Decompressor ()
197200
198- self ._buffer = self ._decompressor .decompress (rawblock )
199- return True
201+ self ._buffer = self ._decompressor .decompress (rawblock )
200202
201203 # Read data until EOF.
202204 # If return_data is false, consume the data without returning it.
@@ -256,11 +258,14 @@ def read(self, size=-1):
256258 return self ._read_block (size )
257259
258260 def read1 (self , size = - 1 ):
259- """Read up to size uncompressed bytes with at most one read
260- from the underlying stream.
261+ """Read up to size uncompressed bytes, while trying to avoid
262+ making multiple reads from the underlying stream.
261263
262264 Returns b'' if the file is at EOF.
263265 """
266+ # Usually, read1() calls _fp.read() at most once. However, sometimes
267+ # this does not give enough data for the decompressor to make progress.
268+ # In this case we make multiple reads, to avoid returning b"".
264269 with self ._lock :
265270 self ._check_can_read ()
266271 if (size == 0 or self ._mode == _MODE_READ_EOF or
0 commit comments