Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Fix a bunch of zlib tests & update gzip.py to Python 3.13 #5563

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 68 additions & 62 deletions Lib/gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,16 @@

FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16

READ, WRITE = 1, 2
READ = 'rb'
WRITE = 'wb'

_COMPRESS_LEVEL_FAST = 1
_COMPRESS_LEVEL_TRADEOFF = 6
_COMPRESS_LEVEL_BEST = 9

READ_BUFFER_SIZE = 128 * 1024
_WRITE_BUFFER_SIZE = 4 * io.DEFAULT_BUFFER_SIZE


def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST,
encoding=None, errors=None, newline=None):
Expand Down Expand Up @@ -118,6 +122,21 @@ class BadGzipFile(OSError):
"""Exception raised in some cases for invalid gzip files."""


class _WriteBufferStream(io.RawIOBase):
"""Minimal object to pass WriteBuffer flushes into GzipFile"""
def __init__(self, gzip_file):
self.gzip_file = gzip_file

def write(self, data):
return self.gzip_file._write_raw(data)

def seekable(self):
return False

def writable(self):
return True


class GzipFile(_compression.BaseStream):
"""The GzipFile class simulates most of the methods of a file object with
the exception of the truncate() method.
Expand Down Expand Up @@ -160,9 +179,10 @@ def __init__(self, filename=None, mode=None,
and 9 is slowest and produces the most compression. 0 is no compression
at all. The default is 9.

The mtime argument is an optional numeric timestamp to be written
to the last modification time field in the stream when compressing.
If omitted or None, the current time is used.
The optional mtime argument is the timestamp requested by gzip. The time
is in Unix format, i.e., seconds since 00:00:00 UTC, January 1, 1970.
If mtime is omitted or None, the current time is used. Use mtime = 0
to generate a compressed stream that does not depend on creation time.

"""

Expand All @@ -182,6 +202,7 @@ def __init__(self, filename=None, mode=None,
if mode is None:
mode = getattr(fileobj, 'mode', 'rb')


if mode.startswith('r'):
self.mode = READ
raw = _GzipReader(fileobj)
Expand All @@ -204,6 +225,9 @@ def __init__(self, filename=None, mode=None,
zlib.DEF_MEM_LEVEL,
0)
self._write_mtime = mtime
self._buffer_size = _WRITE_BUFFER_SIZE
self._buffer = io.BufferedWriter(_WriteBufferStream(self),
buffer_size=self._buffer_size)
else:
raise ValueError("Invalid mode: {!r}".format(mode))

Expand All @@ -212,14 +236,6 @@ def __init__(self, filename=None, mode=None,
if self.mode == WRITE:
self._write_gzip_header(compresslevel)

@property
def filename(self):
import warnings
warnings.warn("use the name attribute", DeprecationWarning, 2)
if self.mode == WRITE and self.name[-3:] != ".gz":
return self.name + ".gz"
return self.name

@property
def mtime(self):
"""Last modification time read from stream, or None"""
Expand All @@ -237,6 +253,11 @@ def _init_write(self, filename):
self.bufsize = 0
self.offset = 0 # Current file offset for seek(), tell(), etc

def tell(self):
self._check_not_closed()
self._buffer.flush()
return super().tell()

def _write_gzip_header(self, compresslevel):
self.fileobj.write(b'\037\213') # magic header
self.fileobj.write(b'\010') # compression method
Expand Down Expand Up @@ -278,6 +299,10 @@ def write(self,data):
if self.fileobj is None:
raise ValueError("write() on closed GzipFile object")

return self._buffer.write(data)

def _write_raw(self, data):
# Called by our self._buffer underlying WriteBufferStream.
if isinstance(data, (bytes, bytearray)):
length = len(data)
else:
Expand Down Expand Up @@ -326,18 +351,19 @@ def closed(self):

def close(self):
fileobj = self.fileobj
if fileobj is None:
if fileobj is None or self._buffer.closed:
return
self.fileobj = None
try:
if self.mode == WRITE:
self._buffer.flush()
fileobj.write(self.compress.flush())
write32u(fileobj, self.crc)
# self.size may exceed 2 GiB, or even 4 GiB
write32u(fileobj, self.size & 0xffffffff)
elif self.mode == READ:
self._buffer.close()
finally:
self.fileobj = None
myfileobj = self.myfileobj
if myfileobj:
self.myfileobj = None
Expand All @@ -346,6 +372,7 @@ def close(self):
def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
self._check_not_closed()
if self.mode == WRITE:
self._buffer.flush()
# Ensure the compressor's buffer is flushed
self.fileobj.write(self.compress.flush(zlib_mode))
self.fileobj.flush()
Expand Down Expand Up @@ -376,6 +403,9 @@ def seekable(self):

def seek(self, offset, whence=io.SEEK_SET):
if self.mode == WRITE:
self._check_not_closed()
# Flush buffer to ensure validity of self.offset
self._buffer.flush()
if whence != io.SEEK_SET:
if whence == io.SEEK_CUR:
offset = self.offset + offset
Expand All @@ -384,10 +414,10 @@ def seek(self, offset, whence=io.SEEK_SET):
if offset < self.offset:
raise OSError('Negative seek in write mode')
count = offset - self.offset
chunk = b'\0' * 1024
for i in range(count // 1024):
chunk = b'\0' * self._buffer_size
for i in range(count // self._buffer_size):
self.write(chunk)
self.write(b'\0' * (count % 1024))
self.write(b'\0' * (count % self._buffer_size))
elif self.mode == READ:
self._check_not_closed()
return self._buffer.seek(offset, whence)
Expand Down Expand Up @@ -454,7 +484,7 @@ def _read_gzip_header(fp):

class _GzipReader(_compression.DecompressReader):
def __init__(self, fp):
super().__init__(_PaddedFile(fp), zlib.decompressobj,
super().__init__(_PaddedFile(fp), zlib._ZlibDecompressor,
wbits=-zlib.MAX_WBITS)
# Set flag indicating start of a new member
self._new_member = True
Expand Down Expand Up @@ -502,12 +532,13 @@ def read(self, size=-1):
self._new_member = False

# Read a chunk of data from the file
buf = self._fp.read(io.DEFAULT_BUFFER_SIZE)
if self._decompressor.needs_input:
buf = self._fp.read(READ_BUFFER_SIZE)
uncompress = self._decompressor.decompress(buf, size)
else:
uncompress = self._decompressor.decompress(b"", size)

uncompress = self._decompressor.decompress(buf, size)
if self._decompressor.unconsumed_tail != b"":
self._fp.prepend(self._decompressor.unconsumed_tail)
elif self._decompressor.unused_data != b"":
if self._decompressor.unused_data != b"":
# Prepend the already read bytes to the fileobj so they can
# be seen by _read_eof() and _read_gzip_header()
self._fp.prepend(self._decompressor.unused_data)
Expand All @@ -518,14 +549,11 @@ def read(self, size=-1):
raise EOFError("Compressed file ended before the "
"end-of-stream marker was reached")

self._add_read_data( uncompress )
self._crc = zlib.crc32(uncompress, self._crc)
self._stream_size += len(uncompress)
self._pos += len(uncompress)
return uncompress

def _add_read_data(self, data):
self._crc = zlib.crc32(data, self._crc)
self._stream_size = self._stream_size + len(data)

def _read_eof(self):
# We've read to the end of the file
# We check that the computed CRC and size of the
Expand All @@ -552,43 +580,21 @@ def _rewind(self):
self._new_member = True


def _create_simple_gzip_header(compresslevel: int,
mtime = None) -> bytes:
"""
Write a simple gzip header with no extra fields.
:param compresslevel: Compresslevel used to determine the xfl bytes.
:param mtime: The mtime (must support conversion to a 32-bit integer).
:return: A bytes object representing the gzip header.
"""
if mtime is None:
mtime = time.time()
if compresslevel == _COMPRESS_LEVEL_BEST:
xfl = 2
elif compresslevel == _COMPRESS_LEVEL_FAST:
xfl = 4
else:
xfl = 0
# Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
# fields added to header), mtime, xfl and os (255 for unknown OS).
return struct.pack("<BBBBLBB", 0x1f, 0x8b, 8, 0, int(mtime), xfl, 255)


def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=0):
"""Compress data in one shot and return the compressed string.

compresslevel sets the compression level in range of 0-9.
mtime can be used to set the modification time. The modification time is
set to the current time by default.
mtime can be used to set the modification time.
The modification time is set to 0 by default, for reproducibility.
"""
if mtime == 0:
# Use zlib as it creates the header with 0 mtime by default.
# This is faster and with less overhead.
return zlib.compress(data, level=compresslevel, wbits=31)
header = _create_simple_gzip_header(compresslevel, mtime)
trailer = struct.pack("<LL", zlib.crc32(data), (len(data) & 0xffffffff))
# Wbits=-15 creates a raw deflate block.
return (header + zlib.compress(data, level=compresslevel, wbits=-15) +
trailer)
# Wbits=31 automatically includes a gzip header and trailer.
gzip_data = zlib.compress(data, level=compresslevel, wbits=31)
if mtime is None:
mtime = time.time()
# Reuse gzip header created by zlib, replace mtime and OS byte for
# consistency.
header = struct.pack("<4sLBB", gzip_data, int(mtime), gzip_data[8], 255)
return header + gzip_data[10:]


def decompress(data):
Expand Down Expand Up @@ -655,7 +661,7 @@ def main():
f = builtins.open(arg, "rb")
g = open(arg + ".gz", "wb")
while True:
chunk = f.read(io.DEFAULT_BUFFER_SIZE)
chunk = f.read(READ_BUFFER_SIZE)
if not chunk:
break
g.write(chunk)
Expand Down
Loading
Loading