Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 186370b

Browse files
committed
Issue #16034 follow-up: Apply optimizations to the lzma module.
1 parent 06eecea commit 186370b

2 files changed

Lines changed: 72 additions & 29 deletions

File tree

Lib/lzma.py

Lines changed: 70 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ def __init__(self, filename=None, mode="r", *,
110110
# stream will need a separate decompressor object.
111111
self._init_args = {"format":format, "filters":filters}
112112
self._decompressor = LZMADecompressor(**self._init_args)
113-
self._buffer = None
113+
self._buffer = b""
114+
self._buffer_offset = 0
114115
elif mode in ("w", "wb", "a", "ab"):
115116
if format is None:
116117
format = FORMAT_XZ
@@ -143,7 +144,7 @@ def close(self):
143144
try:
144145
if self._mode in (_MODE_READ, _MODE_READ_EOF):
145146
self._decompressor = None
146-
self._buffer = None
147+
self._buffer = b""
147148
elif self._mode == _MODE_WRITE:
148149
self._fp.write(self._compressor.flush())
149150
self._compressor = None
@@ -187,15 +188,18 @@ def _check_not_closed(self):
187188
raise ValueError("I/O operation on closed file")
188189

189190
def _check_can_read(self):
190-
if not self.readable():
191+
if self._mode not in (_MODE_READ, _MODE_READ_EOF):
192+
self._check_not_closed()
191193
raise io.UnsupportedOperation("File not open for reading")
192194

193195
def _check_can_write(self):
194-
if not self.writable():
196+
if self._mode != _MODE_WRITE:
197+
self._check_not_closed()
195198
raise io.UnsupportedOperation("File not open for writing")
196199

197200
def _check_can_seek(self):
198-
if not self.readable():
201+
if self._mode not in (_MODE_READ, _MODE_READ_EOF):
202+
self._check_not_closed()
199203
raise io.UnsupportedOperation("Seeking is only supported "
200204
"on files open for reading")
201205
if not self._fp.seekable():
@@ -204,16 +208,13 @@ def _check_can_seek(self):
204208

205209
# Fill the readahead buffer if it is empty. Returns False on EOF.
206210
def _fill_buffer(self):
211+
if self._mode == _MODE_READ_EOF:
212+
return False
207213
# Depending on the input data, our call to the decompressor may not
208214
# return any data. In this case, try again after reading another block.
209-
while True:
210-
if self._buffer:
211-
return True
212-
213-
if self._decompressor.unused_data:
214-
rawblock = self._decompressor.unused_data
215-
else:
216-
rawblock = self._fp.read(_BUFFER_SIZE)
215+
while self._buffer_offset == len(self._buffer):
216+
rawblock = (self._decompressor.unused_data or
217+
self._fp.read(_BUFFER_SIZE))
217218

218219
if not rawblock:
219220
if self._decompressor.eof:
@@ -229,30 +230,48 @@ def _fill_buffer(self):
229230
self._decompressor = LZMADecompressor(**self._init_args)
230231

231232
self._buffer = self._decompressor.decompress(rawblock)
233+
self._buffer_offset = 0
234+
return True
232235

233236
# Read data until EOF.
234237
# If return_data is false, consume the data without returning it.
235238
def _read_all(self, return_data=True):
239+
# The loop assumes that _buffer_offset is 0. Ensure that this is true.
240+
self._buffer = self._buffer[self._buffer_offset:]
241+
self._buffer_offset = 0
242+
236243
blocks = []
237244
while self._fill_buffer():
238245
if return_data:
239246
blocks.append(self._buffer)
240247
self._pos += len(self._buffer)
241-
self._buffer = None
248+
self._buffer = b""
242249
if return_data:
243250
return b"".join(blocks)
244251

245252
# Read a block of up to n bytes.
246253
# If return_data is false, consume the data without returning it.
247254
def _read_block(self, n, return_data=True):
255+
# If we have enough data buffered, return immediately.
256+
end = self._buffer_offset + n
257+
if end <= len(self._buffer):
258+
data = self._buffer[self._buffer_offset : end]
259+
self._buffer_offset = end
260+
self._pos += len(data)
261+
return data if return_data else None
262+
263+
# The loop assumes that _buffer_offset is 0. Ensure that this is true.
264+
self._buffer = self._buffer[self._buffer_offset:]
265+
self._buffer_offset = 0
266+
248267
blocks = []
249268
while n > 0 and self._fill_buffer():
250269
if n < len(self._buffer):
251270
data = self._buffer[:n]
252-
self._buffer = self._buffer[n:]
271+
self._buffer_offset = n
253272
else:
254273
data = self._buffer
255-
self._buffer = None
274+
self._buffer = b""
256275
if return_data:
257276
blocks.append(data)
258277
self._pos += len(data)
@@ -267,9 +286,9 @@ def peek(self, size=-1):
267286
The exact number of bytes returned is unspecified.
268287
"""
269288
self._check_can_read()
270-
if self._mode == _MODE_READ_EOF or not self._fill_buffer():
289+
if not self._fill_buffer():
271290
return b""
272-
return self._buffer
291+
return self._buffer[self._buffer_offset:]
273292

274293
def read(self, size=-1):
275294
"""Read up to size uncompressed bytes from the file.
@@ -278,7 +297,7 @@ def read(self, size=-1):
278297
Returns b"" if the file is already at EOF.
279298
"""
280299
self._check_can_read()
281-
if self._mode == _MODE_READ_EOF or size == 0:
300+
if size == 0:
282301
return b""
283302
elif size < 0:
284303
return self._read_all()
@@ -295,18 +314,40 @@ def read1(self, size=-1):
295314
# this does not give enough data for the decompressor to make progress.
296315
# In this case we make multiple reads, to avoid returning b"".
297316
self._check_can_read()
298-
if (size == 0 or self._mode == _MODE_READ_EOF or
299-
not self._fill_buffer()):
317+
if (size == 0 or
318+
# Only call _fill_buffer() if the buffer is actually empty.
319+
# This gives a significant speedup if *size* is small.
320+
(self._buffer_offset == len(self._buffer) and not self._fill_buffer())):
300321
return b""
301-
if 0 < size < len(self._buffer):
302-
data = self._buffer[:size]
303-
self._buffer = self._buffer[size:]
322+
if size > 0:
323+
data = self._buffer[self._buffer_offset :
324+
self._buffer_offset + size]
325+
self._buffer_offset += len(data)
304326
else:
305-
data = self._buffer
306-
self._buffer = None
327+
data = self._buffer[self._buffer_offset:]
328+
self._buffer = b""
329+
self._buffer_offset = 0
307330
self._pos += len(data)
308331
return data
309332

333+
def readline(self, size=-1):
334+
"""Read a line of uncompressed bytes from the file.
335+
336+
The terminating newline (if present) is retained. If size is
337+
non-negative, no more than size bytes will be read (in which
338+
case the line may be incomplete). Returns b'' if already at EOF.
339+
"""
340+
self._check_can_read()
341+
# Shortcut for the common case - the whole line is in the buffer.
342+
if size < 0:
343+
end = self._buffer.find(b"\n", self._buffer_offset) + 1
344+
if end > 0:
345+
line = self._buffer[self._buffer_offset : end]
346+
self._buffer_offset = end
347+
self._pos += len(line)
348+
return line
349+
return io.BufferedIOBase.readline(self, size)
350+
310351
def write(self, data):
311352
"""Write a bytes object to the file.
312353
@@ -326,7 +367,8 @@ def _rewind(self):
326367
self._mode = _MODE_READ
327368
self._pos = 0
328369
self._decompressor = LZMADecompressor(**self._init_args)
329-
self._buffer = None
370+
self._buffer = b""
371+
self._buffer_offset = 0
330372

331373
def seek(self, offset, whence=0):
332374
"""Change the file position.
@@ -365,8 +407,7 @@ def seek(self, offset, whence=0):
365407
offset -= self._pos
366408

367409
# Read and discard data until we reach the desired position.
368-
if self._mode != _MODE_READ_EOF:
369-
self._read_block(offset, return_data=False)
410+
self._read_block(offset, return_data=False)
370411

371412
return self._pos
372413

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ Library
6262
- Issue #12034: Fix bogus caching of result in check_GetFinalPathNameByHandle.
6363
Patch by Atsuo Ishimoto.
6464

65+
- Improve performance of `lzma.LZMAFile`.
66+
6567
- Issue #16220: wsgiref now always calls close() on an iterable response.
6668
Patch by Brent Tubbs.
6769

0 commit comments

Comments
 (0)