Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b1f8835

Browse files
committed
Merged revisions 77288 via svnmerge from
svn+ssh://[email protected]/python/trunk ........ r77288 | antoine.pitrou | 2010-01-03 23:29:56 +0100 (dim., 03 janv. 2010) | 5 lines Issue #7471: Improve the performance of GzipFile's buffering mechanism, and make it implement the `io.BufferedIOBase` ABC to allow for further speedups by wrapping it in an `io.BufferedReader`. Patch by Nir Aides. ........
1 parent a81d881 commit b1f8835

3 files changed

Lines changed: 58 additions & 58 deletions

File tree

Lib/gzip.py

Lines changed: 42 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import struct, sys, time, os
99
import zlib
1010
import builtins
11+
import io
1112

1213
__all__ = ["GzipFile","open"]
1314

@@ -44,7 +45,7 @@ def open(filename, mode="rb", compresslevel=9):
4445
"""
4546
return GzipFile(filename, mode, compresslevel)
4647

47-
class GzipFile:
48+
class GzipFile(io.BufferedIOBase):
4849
"""The GzipFile class simulates most of the methods of a file object with
4950
the exception of the readinto() and truncate() methods.
5051
@@ -109,8 +110,12 @@ def __init__(self, filename=None, mode=None,
109110
self.mode = READ
110111
# Set flag indicating start of a new member
111112
self._new_member = True
113+
# Buffer data read from gzip file. extrastart is offset in
114+
# stream where buffer starts. extrasize is number of
115+
# bytes remaining in buffer from current stream position.
112116
self.extrabuf = b""
113117
self.extrasize = 0
118+
self.extrastart = 0
114119
self.name = filename
115120
# Starts small, scales exponentially
116121
self.min_readsize = 100
@@ -214,20 +219,26 @@ def _read_gzip_header(self):
214219
if flag & FHCRC:
215220
self.fileobj.read(2) # Read & discard the 16-bit header CRC
216221

217-
218222
def write(self,data):
219223
if self.mode != WRITE:
220224
import errno
221225
raise IOError(errno.EBADF, "write() on read-only GzipFile object")
222226

223227
if self.fileobj is None:
224228
raise ValueError("write() on closed GzipFile object")
229+
230+
# Convert data type if called by io.BufferedWriter.
231+
if isinstance(data, memoryview):
232+
data = data.tobytes()
233+
225234
if len(data) > 0:
226235
self.size = self.size + len(data)
227236
self.crc = zlib.crc32(data, self.crc) & 0xffffffff
228237
self.fileobj.write( self.compress.compress(data) )
229238
self.offset += len(data)
230239

240+
return len(data)
241+
231242
def read(self, size=-1):
232243
if self.mode != READ:
233244
import errno
@@ -253,15 +264,14 @@ def read(self, size=-1):
253264
if size > self.extrasize:
254265
size = self.extrasize
255266

256-
chunk = self.extrabuf[:size]
257-
self.extrabuf = self.extrabuf[size:]
267+
offset = self.offset - self.extrastart
268+
chunk = self.extrabuf[offset: offset + size]
258269
self.extrasize = self.extrasize - size
259270

260271
self.offset += size
261272
return chunk
262273

263274
def _unread(self, buf):
264-
self.extrabuf = buf + self.extrabuf
265275
self.extrasize = len(buf) + self.extrasize
266276
self.offset -= len(buf)
267277

@@ -317,8 +327,10 @@ def _read(self, size=1024):
317327

318328
def _add_read_data(self, data):
319329
self.crc = zlib.crc32(data, self.crc) & 0xffffffff
320-
self.extrabuf = self.extrabuf + data
330+
offset = self.offset - self.extrastart
331+
self.extrabuf = self.extrabuf[offset:] + data
321332
self.extrasize = self.extrasize + len(data)
333+
self.extrastart = self.offset
322334
self.size = self.size + len(data)
323335

324336
def _read_eof(self):
@@ -336,6 +348,10 @@ def _read_eof(self):
336348
elif isize != (self.size & 0xffffffff):
337349
raise IOError("Incorrect length of data produced")
338350

351+
@property
352+
def closed(self):
353+
return self.fileobj is None
354+
339355
def close(self):
340356
if self.fileobj is None:
341357
return
@@ -351,15 +367,6 @@ def close(self):
351367
self.myfileobj.close()
352368
self.myfileobj = None
353369

354-
def __del__(self):
355-
try:
356-
if (self.myfileobj is None and
357-
self.fileobj is None):
358-
return
359-
except AttributeError:
360-
return
361-
self.close()
362-
363370
def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
364371
if self.mode == WRITE:
365372
# Ensure the compressor's buffer is flushed
@@ -374,12 +381,6 @@ def fileno(self):
374381
"""
375382
return self.fileobj.fileno()
376383

377-
def isatty(self):
378-
return False
379-
380-
def tell(self):
381-
return self.offset
382-
383384
def rewind(self):
384385
'''Return the uncompressed stream file position indicator to the
385386
beginning of the file'''
@@ -389,8 +390,18 @@ def rewind(self):
389390
self._new_member = True
390391
self.extrabuf = b""
391392
self.extrasize = 0
393+
self.extrastart = 0
392394
self.offset = 0
393395

396+
def readable(self):
397+
return self.mode == READ
398+
399+
def writable(self):
400+
return self.mode == WRITE
401+
402+
def seekable(self):
403+
return True
404+
394405
def seek(self, offset, whence=0):
395406
if whence:
396407
if whence == 1:
@@ -414,8 +425,18 @@ def seek(self, offset, whence=0):
414425
self.read(1024)
415426
self.read(count % 1024)
416427

428+
return self.offset
429+
417430
def readline(self, size=-1):
418431
if size < 0:
432+
# Shortcut common case - newline found in buffer.
433+
offset = self.offset - self.extrastart
434+
i = self.extrabuf.find(b'\n', offset) + 1
435+
if i > 0:
436+
self.extrasize -= i - offset
437+
self.offset += i - offset
438+
return self.extrabuf[offset: i]
439+
419440
size = sys.maxsize
420441
readsize = self.min_readsize
421442
else:
@@ -445,42 +466,6 @@ def readline(self, size=-1):
445466
self.min_readsize = min(readsize, self.min_readsize * 2, 512)
446467
return b''.join(bufs) # Return resulting line
447468

448-
def readlines(self, sizehint=0):
449-
# Negative numbers result in reading all the lines
450-
if sizehint <= 0:
451-
sizehint = sys.maxsize
452-
L = []
453-
while sizehint > 0:
454-
line = self.readline()
455-
if line == b"":
456-
break
457-
L.append(line)
458-
sizehint = sizehint - len(line)
459-
460-
return L
461-
462-
def writelines(self, L):
463-
for line in L:
464-
self.write(line)
465-
466-
def __iter__(self):
467-
return self
468-
469-
def __next__(self):
470-
line = self.readline()
471-
if line:
472-
return line
473-
else:
474-
raise StopIteration
475-
476-
def __enter__(self):
477-
if self.fileobj is None:
478-
raise ValueError("I/O operation on closed GzipFile object")
479-
return self
480-
481-
def __exit__(self, *args):
482-
self.close()
483-
484469

485470
def _test():
486471
# Act like gzip; with -d, act like gunzip.

Lib/test/test_gzip.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import unittest
66
from test import support
77
import os
8+
import io
89
import struct
910
gzip = support.import_module('gzip')
1011

@@ -80,6 +81,16 @@ def test_many_append(self):
8081
zgfile.close()
8182
self.assertEquals(contents, b'a'*201)
8283

84+
def test_buffered_reader(self):
85+
# Issue #7471: a GzipFile can be wrapped in a BufferedReader for
86+
# performance.
87+
self.test_write()
88+
89+
f = gzip.GzipFile(self.filename, 'rb')
90+
with io.BufferedReader(f) as r:
91+
lines = [line for line in r]
92+
93+
self.assertEqual(lines, 50 * data1.splitlines(True))
8394

8495
def test_readline(self):
8596
self.test_write()

Misc/NEWS

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,11 @@ C-API
191191
Library
192192
-------
193193

194-
_ Issue #3972: http.client.HTTPConnection now accepts an optional source_address
194+
- Issue #7471: Improve the performance of GzipFile's buffering mechanism,
195+
and make it implement the `io.BufferedIOBase` ABC to allow for further
196+
speedups by wrapping it in an `io.BufferedReader`. Patch by Nir Aides.
197+
198+
- Issue #3972: http.client.HTTPConnection now accepts an optional source_address
195199
parameter to allow specifying where your connections come from.
196200

197201
- socket.create_connection now accepts an optional source_address parameter.

0 commit comments

Comments
 (0)