@@ -110,7 +110,8 @@ def __init__(self, filename=None, mode="r", *,
110110 # stream will need a separate decompressor object.
111111 self ._init_args = {"format" :format , "filters" :filters }
112112 self ._decompressor = LZMADecompressor (** self ._init_args )
113- self ._buffer = None
113+ self ._buffer = b""
114+ self ._buffer_offset = 0
114115 elif mode in ("w" , "wb" , "a" , "ab" ):
115116 if format is None :
116117 format = FORMAT_XZ
@@ -143,7 +144,7 @@ def close(self):
143144 try :
144145 if self ._mode in (_MODE_READ , _MODE_READ_EOF ):
145146 self ._decompressor = None
146- self ._buffer = None
147+ self ._buffer = b""
147148 elif self ._mode == _MODE_WRITE :
148149 self ._fp .write (self ._compressor .flush ())
149150 self ._compressor = None
@@ -187,15 +188,18 @@ def _check_not_closed(self):
187188 raise ValueError ("I/O operation on closed file" )
188189
189190 def _check_can_read (self ):
190- if not self .readable ():
191+ if self ._mode not in (_MODE_READ , _MODE_READ_EOF ):
192+ self ._check_not_closed ()
191193 raise io .UnsupportedOperation ("File not open for reading" )
192194
193195 def _check_can_write (self ):
194- if not self .writable ():
196+ if self ._mode != _MODE_WRITE :
197+ self ._check_not_closed ()
195198 raise io .UnsupportedOperation ("File not open for writing" )
196199
197200 def _check_can_seek (self ):
198- if not self .readable ():
201+ if self ._mode not in (_MODE_READ , _MODE_READ_EOF ):
202+ self ._check_not_closed ()
199203 raise io .UnsupportedOperation ("Seeking is only supported "
200204 "on files open for reading" )
201205 if not self ._fp .seekable ():
@@ -204,16 +208,13 @@ def _check_can_seek(self):
204208
205209 # Fill the readahead buffer if it is empty. Returns False on EOF.
206210 def _fill_buffer (self ):
211+ if self ._mode == _MODE_READ_EOF :
212+ return False
207213 # Depending on the input data, our call to the decompressor may not
208214 # return any data. In this case, try again after reading another block.
209- while True :
210- if self ._buffer :
211- return True
212-
213- if self ._decompressor .unused_data :
214- rawblock = self ._decompressor .unused_data
215- else :
216- rawblock = self ._fp .read (_BUFFER_SIZE )
215+ while self ._buffer_offset == len (self ._buffer ):
216+ rawblock = (self ._decompressor .unused_data or
217+ self ._fp .read (_BUFFER_SIZE ))
217218
218219 if not rawblock :
219220 if self ._decompressor .eof :
@@ -229,30 +230,48 @@ def _fill_buffer(self):
229230 self ._decompressor = LZMADecompressor (** self ._init_args )
230231
231232 self ._buffer = self ._decompressor .decompress (rawblock )
233+ self ._buffer_offset = 0
234+ return True
232235
233236 # Read data until EOF.
234237 # If return_data is false, consume the data without returning it.
235238 def _read_all (self , return_data = True ):
239+ # The loop assumes that _buffer_offset is 0. Ensure that this is true.
240+ self ._buffer = self ._buffer [self ._buffer_offset :]
241+ self ._buffer_offset = 0
242+
236243 blocks = []
237244 while self ._fill_buffer ():
238245 if return_data :
239246 blocks .append (self ._buffer )
240247 self ._pos += len (self ._buffer )
241- self ._buffer = None
248+ self ._buffer = b""
242249 if return_data :
243250 return b"" .join (blocks )
244251
245252 # Read a block of up to n bytes.
246253 # If return_data is false, consume the data without returning it.
247254 def _read_block (self , n , return_data = True ):
255+ # If we have enough data buffered, return immediately.
256+ end = self ._buffer_offset + n
257+ if end <= len (self ._buffer ):
258+ data = self ._buffer [self ._buffer_offset : end ]
259+ self ._buffer_offset = end
260+ self ._pos += len (data )
261+ return data if return_data else None
262+
263+ # The loop assumes that _buffer_offset is 0. Ensure that this is true.
264+ self ._buffer = self ._buffer [self ._buffer_offset :]
265+ self ._buffer_offset = 0
266+
248267 blocks = []
249268 while n > 0 and self ._fill_buffer ():
250269 if n < len (self ._buffer ):
251270 data = self ._buffer [:n ]
252- self ._buffer = self . _buffer [ n :]
271+ self ._buffer_offset = n
253272 else :
254273 data = self ._buffer
255- self ._buffer = None
274+ self ._buffer = b""
256275 if return_data :
257276 blocks .append (data )
258277 self ._pos += len (data )
@@ -267,9 +286,9 @@ def peek(self, size=-1):
267286 The exact number of bytes returned is unspecified.
268287 """
269288 self ._check_can_read ()
270- if self . _mode == _MODE_READ_EOF or not self ._fill_buffer ():
289+ if not self ._fill_buffer ():
271290 return b""
272- return self ._buffer
291+ return self ._buffer [ self . _buffer_offset :]
273292
274293 def read (self , size = - 1 ):
275294 """Read up to size uncompressed bytes from the file.
@@ -278,7 +297,7 @@ def read(self, size=-1):
278297 Returns b"" if the file is already at EOF.
279298 """
280299 self ._check_can_read ()
281- if self . _mode == _MODE_READ_EOF or size == 0 :
300+ if size == 0 :
282301 return b""
283302 elif size < 0 :
284303 return self ._read_all ()
@@ -295,18 +314,40 @@ def read1(self, size=-1):
295314 # this does not give enough data for the decompressor to make progress.
296315 # In this case we make multiple reads, to avoid returning b"".
297316 self ._check_can_read ()
298- if (size == 0 or self ._mode == _MODE_READ_EOF or
299- not self ._fill_buffer ()):
317+ if (size == 0 or
318+ # Only call _fill_buffer() if the buffer is actually empty.
319+ # This gives a significant speedup if *size* is small.
320+ (self ._buffer_offset == len (self ._buffer ) and not self ._fill_buffer ())):
300321 return b""
301- if 0 < size < len (self ._buffer ):
302- data = self ._buffer [:size ]
303- self ._buffer = self ._buffer [size :]
322+ if size > 0 :
323+ data = self ._buffer [self ._buffer_offset :
324+ self ._buffer_offset + size ]
325+ self ._buffer_offset += len (data )
304326 else :
305- data = self ._buffer
306- self ._buffer = None
327+ data = self ._buffer [self ._buffer_offset :]
328+ self ._buffer = b""
329+ self ._buffer_offset = 0
307330 self ._pos += len (data )
308331 return data
309332
333+ def readline (self , size = - 1 ):
334+ """Read a line of uncompressed bytes from the file.
335+
336+ The terminating newline (if present) is retained. If size is
337+ non-negative, no more than size bytes will be read (in which
338+ case the line may be incomplete). Returns b'' if already at EOF.
339+ """
340+ self ._check_can_read ()
341+ # Shortcut for the common case - the whole line is in the buffer.
342+ if size < 0 :
343+ end = self ._buffer .find (b"\n " , self ._buffer_offset ) + 1
344+ if end > 0 :
345+ line = self ._buffer [self ._buffer_offset : end ]
346+ self ._buffer_offset = end
347+ self ._pos += len (line )
348+ return line
349+ return io .BufferedIOBase .readline (self , size )
350+
310351 def write (self , data ):
311352 """Write a bytes object to the file.
312353
@@ -326,7 +367,8 @@ def _rewind(self):
326367 self ._mode = _MODE_READ
327368 self ._pos = 0
328369 self ._decompressor = LZMADecompressor (** self ._init_args )
329- self ._buffer = None
370+ self ._buffer = b""
371+ self ._buffer_offset = 0
330372
331373 def seek (self , offset , whence = 0 ):
332374 """Change the file position.
@@ -365,8 +407,7 @@ def seek(self, offset, whence=0):
365407 offset -= self ._pos
366408
367409 # Read and discard data until we reach the desired position.
368- if self ._mode != _MODE_READ_EOF :
369- self ._read_block (offset , return_data = False )
410+ self ._read_block (offset , return_data = False )
370411
371412 return self ._pos
372413
0 commit comments