Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c20c90f

Browse files
authored
Merge pull request #5563 from coolreader18/fix-zlib-tests
Fix a bunch of zlib tests & update gzip.py to Python 3.13
2 parents 4468dcb + aba3d5c commit c20c90f

File tree

5 files changed

+667
-381
lines changed

5 files changed

+667
-381
lines changed

Lib/gzip.py

Lines changed: 68 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,16 @@
1515

1616
FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
1717

18-
READ, WRITE = 1, 2
18+
READ = 'rb'
19+
WRITE = 'wb'
1920

2021
_COMPRESS_LEVEL_FAST = 1
2122
_COMPRESS_LEVEL_TRADEOFF = 6
2223
_COMPRESS_LEVEL_BEST = 9
2324

25+
READ_BUFFER_SIZE = 128 * 1024
26+
_WRITE_BUFFER_SIZE = 4 * io.DEFAULT_BUFFER_SIZE
27+
2428

2529
def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST,
2630
encoding=None, errors=None, newline=None):
@@ -118,6 +122,21 @@ class BadGzipFile(OSError):
118122
"""Exception raised in some cases for invalid gzip files."""
119123

120124

125+
class _WriteBufferStream(io.RawIOBase):
126+
"""Minimal object to pass WriteBuffer flushes into GzipFile"""
127+
def __init__(self, gzip_file):
128+
self.gzip_file = gzip_file
129+
130+
def write(self, data):
131+
return self.gzip_file._write_raw(data)
132+
133+
def seekable(self):
134+
return False
135+
136+
def writable(self):
137+
return True
138+
139+
121140
class GzipFile(_compression.BaseStream):
122141
"""The GzipFile class simulates most of the methods of a file object with
123142
the exception of the truncate() method.
@@ -160,9 +179,10 @@ def __init__(self, filename=None, mode=None,
160179
and 9 is slowest and produces the most compression. 0 is no compression
161180
at all. The default is 9.
162181
163-
The mtime argument is an optional numeric timestamp to be written
164-
to the last modification time field in the stream when compressing.
165-
If omitted or None, the current time is used.
182+
The optional mtime argument is the timestamp requested by gzip. The time
183+
is in Unix format, i.e., seconds since 00:00:00 UTC, January 1, 1970.
184+
If mtime is omitted or None, the current time is used. Use mtime = 0
185+
to generate a compressed stream that does not depend on creation time.
166186
167187
"""
168188

@@ -182,6 +202,7 @@ def __init__(self, filename=None, mode=None,
182202
if mode is None:
183203
mode = getattr(fileobj, 'mode', 'rb')
184204

205+
185206
if mode.startswith('r'):
186207
self.mode = READ
187208
raw = _GzipReader(fileobj)
@@ -204,6 +225,9 @@ def __init__(self, filename=None, mode=None,
204225
zlib.DEF_MEM_LEVEL,
205226
0)
206227
self._write_mtime = mtime
228+
self._buffer_size = _WRITE_BUFFER_SIZE
229+
self._buffer = io.BufferedWriter(_WriteBufferStream(self),
230+
buffer_size=self._buffer_size)
207231
else:
208232
raise ValueError("Invalid mode: {!r}".format(mode))
209233

@@ -212,14 +236,6 @@ def __init__(self, filename=None, mode=None,
212236
if self.mode == WRITE:
213237
self._write_gzip_header(compresslevel)
214238

215-
@property
216-
def filename(self):
217-
import warnings
218-
warnings.warn("use the name attribute", DeprecationWarning, 2)
219-
if self.mode == WRITE and self.name[-3:] != ".gz":
220-
return self.name + ".gz"
221-
return self.name
222-
223239
@property
224240
def mtime(self):
225241
"""Last modification time read from stream, or None"""
@@ -237,6 +253,11 @@ def _init_write(self, filename):
237253
self.bufsize = 0
238254
self.offset = 0 # Current file offset for seek(), tell(), etc
239255

256+
def tell(self):
257+
self._check_not_closed()
258+
self._buffer.flush()
259+
return super().tell()
260+
240261
def _write_gzip_header(self, compresslevel):
241262
self.fileobj.write(b'\037\213') # magic header
242263
self.fileobj.write(b'\010') # compression method
@@ -278,6 +299,10 @@ def write(self,data):
278299
if self.fileobj is None:
279300
raise ValueError("write() on closed GzipFile object")
280301

302+
return self._buffer.write(data)
303+
304+
def _write_raw(self, data):
305+
# Called by our self._buffer underlying WriteBufferStream.
281306
if isinstance(data, (bytes, bytearray)):
282307
length = len(data)
283308
else:
@@ -326,18 +351,19 @@ def closed(self):
326351

327352
def close(self):
328353
fileobj = self.fileobj
329-
if fileobj is None:
354+
if fileobj is None or self._buffer.closed:
330355
return
331-
self.fileobj = None
332356
try:
333357
if self.mode == WRITE:
358+
self._buffer.flush()
334359
fileobj.write(self.compress.flush())
335360
write32u(fileobj, self.crc)
336361
# self.size may exceed 2 GiB, or even 4 GiB
337362
write32u(fileobj, self.size & 0xffffffff)
338363
elif self.mode == READ:
339364
self._buffer.close()
340365
finally:
366+
self.fileobj = None
341367
myfileobj = self.myfileobj
342368
if myfileobj:
343369
self.myfileobj = None
@@ -346,6 +372,7 @@ def close(self):
346372
def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
347373
self._check_not_closed()
348374
if self.mode == WRITE:
375+
self._buffer.flush()
349376
# Ensure the compressor's buffer is flushed
350377
self.fileobj.write(self.compress.flush(zlib_mode))
351378
self.fileobj.flush()
@@ -376,6 +403,9 @@ def seekable(self):
376403

377404
def seek(self, offset, whence=io.SEEK_SET):
378405
if self.mode == WRITE:
406+
self._check_not_closed()
407+
# Flush buffer to ensure validity of self.offset
408+
self._buffer.flush()
379409
if whence != io.SEEK_SET:
380410
if whence == io.SEEK_CUR:
381411
offset = self.offset + offset
@@ -384,10 +414,10 @@ def seek(self, offset, whence=io.SEEK_SET):
384414
if offset < self.offset:
385415
raise OSError('Negative seek in write mode')
386416
count = offset - self.offset
387-
chunk = b'\0' * 1024
388-
for i in range(count // 1024):
417+
chunk = b'\0' * self._buffer_size
418+
for i in range(count // self._buffer_size):
389419
self.write(chunk)
390-
self.write(b'\0' * (count % 1024))
420+
self.write(b'\0' * (count % self._buffer_size))
391421
elif self.mode == READ:
392422
self._check_not_closed()
393423
return self._buffer.seek(offset, whence)
@@ -454,7 +484,7 @@ def _read_gzip_header(fp):
454484

455485
class _GzipReader(_compression.DecompressReader):
456486
def __init__(self, fp):
457-
super().__init__(_PaddedFile(fp), zlib.decompressobj,
487+
super().__init__(_PaddedFile(fp), zlib._ZlibDecompressor,
458488
wbits=-zlib.MAX_WBITS)
459489
# Set flag indicating start of a new member
460490
self._new_member = True
@@ -502,12 +532,13 @@ def read(self, size=-1):
502532
self._new_member = False
503533

504534
# Read a chunk of data from the file
505-
buf = self._fp.read(io.DEFAULT_BUFFER_SIZE)
535+
if self._decompressor.needs_input:
536+
buf = self._fp.read(READ_BUFFER_SIZE)
537+
uncompress = self._decompressor.decompress(buf, size)
538+
else:
539+
uncompress = self._decompressor.decompress(b"", size)
506540

507-
uncompress = self._decompressor.decompress(buf, size)
508-
if self._decompressor.unconsumed_tail != b"":
509-
self._fp.prepend(self._decompressor.unconsumed_tail)
510-
elif self._decompressor.unused_data != b"":
541+
if self._decompressor.unused_data != b"":
511542
# Prepend the already read bytes to the fileobj so they can
512543
# be seen by _read_eof() and _read_gzip_header()
513544
self._fp.prepend(self._decompressor.unused_data)
@@ -518,14 +549,11 @@ def read(self, size=-1):
518549
raise EOFError("Compressed file ended before the "
519550
"end-of-stream marker was reached")
520551

521-
self._add_read_data( uncompress )
552+
self._crc = zlib.crc32(uncompress, self._crc)
553+
self._stream_size += len(uncompress)
522554
self._pos += len(uncompress)
523555
return uncompress
524556

525-
def _add_read_data(self, data):
526-
self._crc = zlib.crc32(data, self._crc)
527-
self._stream_size = self._stream_size + len(data)
528-
529557
def _read_eof(self):
530558
# We've read to the end of the file
531559
# We check that the computed CRC and size of the
@@ -552,43 +580,21 @@ def _rewind(self):
552580
self._new_member = True
553581

554582

555-
def _create_simple_gzip_header(compresslevel: int,
556-
mtime = None) -> bytes:
557-
"""
558-
Write a simple gzip header with no extra fields.
559-
:param compresslevel: Compresslevel used to determine the xfl bytes.
560-
:param mtime: The mtime (must support conversion to a 32-bit integer).
561-
:return: A bytes object representing the gzip header.
562-
"""
563-
if mtime is None:
564-
mtime = time.time()
565-
if compresslevel == _COMPRESS_LEVEL_BEST:
566-
xfl = 2
567-
elif compresslevel == _COMPRESS_LEVEL_FAST:
568-
xfl = 4
569-
else:
570-
xfl = 0
571-
# Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
572-
# fields added to header), mtime, xfl and os (255 for unknown OS).
573-
return struct.pack("<BBBBLBB", 0x1f, 0x8b, 8, 0, int(mtime), xfl, 255)
574-
575-
576-
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
583+
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=0):
577584
"""Compress data in one shot and return the compressed string.
578585
579586
compresslevel sets the compression level in range of 0-9.
580-
mtime can be used to set the modification time. The modification time is
581-
set to the current time by default.
587+
mtime can be used to set the modification time.
588+
The modification time is set to 0 by default, for reproducibility.
582589
"""
583-
if mtime == 0:
584-
# Use zlib as it creates the header with 0 mtime by default.
585-
# This is faster and with less overhead.
586-
return zlib.compress(data, level=compresslevel, wbits=31)
587-
header = _create_simple_gzip_header(compresslevel, mtime)
588-
trailer = struct.pack("<LL", zlib.crc32(data), (len(data) & 0xffffffff))
589-
# Wbits=-15 creates a raw deflate block.
590-
return (header + zlib.compress(data, level=compresslevel, wbits=-15) +
591-
trailer)
590+
# Wbits=31 automatically includes a gzip header and trailer.
591+
gzip_data = zlib.compress(data, level=compresslevel, wbits=31)
592+
if mtime is None:
593+
mtime = time.time()
594+
# Reuse gzip header created by zlib, replace mtime and OS byte for
595+
# consistency.
596+
header = struct.pack("<4sLBB", gzip_data, int(mtime), gzip_data[8], 255)
597+
return header + gzip_data[10:]
592598

593599

594600
def decompress(data):
@@ -655,7 +661,7 @@ def main():
655661
f = builtins.open(arg, "rb")
656662
g = open(arg + ".gz", "wb")
657663
while True:
658-
chunk = f.read(io.DEFAULT_BUFFER_SIZE)
664+
chunk = f.read(READ_BUFFER_SIZE)
659665
if not chunk:
660666
break
661667
g.write(chunk)

0 commit comments

Comments
 (0)