Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 36befa5

Browse files
committed
Issue #25626: Merge zlib fix from 3.5
2 parents 266b276 + e99e977 commit 36befa5

7 files changed

Lines changed: 121 additions & 48 deletions

File tree

Doc/howto/clinic.rst

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,18 +1249,18 @@ Here's the simplest example of a custom converter, from ``Modules/zlibmodule.c``
12491249

12501250
/*[python input]
12511251

1252-
class uint_converter(CConverter):
1252+
class capped_uint_converter(CConverter):
12531253
type = 'unsigned int'
1254-
converter = 'uint_converter'
1254+
converter = 'capped_uint_converter'
12551255

12561256
[python start generated code]*/
1257-
/*[python end generated code: checksum=da39a3ee5e6b4b0d3255bfef95601890afd80709]*/
1257+
/*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/
12581258

1259-
This block adds a converter to Argument Clinic named ``uint``. Parameters
1260-
declared as ``uint`` will be declared as type ``unsigned int``, and will
1261-
be parsed by the ``'O&'`` format unit, which will call the ``uint_converter``
1262-
converter function.
1263-
``uint`` variables automatically support default values.
1259+
This block adds a converter to Argument Clinic named ``capped_uint``. Parameters
1260+
declared as ``capped_uint`` will be declared as type ``unsigned int``, and will
1261+
be parsed by the ``'O&'`` format unit, which will call the
1262+
``capped_uint_converter`` converter function. ``capped_uint`` variables
1263+
automatically support default values.
12641264

12651265
More sophisticated custom converters can insert custom C code to
12661266
handle initialization and cleanup.

Lib/test/support/__init__.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1610,12 +1610,15 @@ def stop(self):
16101610
def bigmemtest(size, memuse, dry_run=True):
16111611
"""Decorator for bigmem tests.
16121612
1613-
'minsize' is the minimum useful size for the test (in arbitrary,
1614-
test-interpreted units.) 'memuse' is the number of 'bytes per size' for
1615-
the test, or a good estimate of it.
1616-
1617-
if 'dry_run' is False, it means the test doesn't support dummy runs
1618-
when -M is not specified.
1613+
'size' is a requested size for the test (in arbitrary, test-interpreted
1614+
units.) 'memuse' is the number of bytes per unit for the test, or a good
1615+
estimate of it. For example, a test that needs two byte buffers, of 4 GiB
1616+
each, could be decorated with @bigmemtest(size=_4G, memuse=2).
1617+
1618+
The 'size' argument is normally passed to the decorated test method as an
1619+
extra argument. If 'dry_run' is true, the value passed to the test method
1620+
may be less than the requested value. If 'dry_run' is false, it means the
1621+
test doesn't support dummy runs when -M is not specified.
16191622
"""
16201623
def decorator(f):
16211624
def wrapper(self):

Lib/test/test_gzip.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import unittest
55
from test import support
6+
from test.support import bigmemtest, _4G
67
import os
78
import io
89
import struct
@@ -116,6 +117,14 @@ def test_read1(self):
116117
self.assertEqual(f.tell(), nread)
117118
self.assertEqual(b''.join(blocks), data1 * 50)
118119

120+
@bigmemtest(size=_4G, memuse=1)
121+
def test_read_large(self, size):
122+
# Read chunk size over UINT_MAX should be supported, despite zlib's
123+
# limitation per low-level call
124+
compressed = gzip.compress(data1, compresslevel=1)
125+
f = gzip.GzipFile(fileobj=io.BytesIO(compressed), mode='rb')
126+
self.assertEqual(f.read(size), data1)
127+
119128
def test_io_on_closed_object(self):
120129
# Test that I/O operations on closed GzipFile objects raise a
121130
# ValueError, just like the corresponding functions on file objects.

Lib/test/test_zlib.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,17 @@ def test_decompressobj_badflush(self):
122122
self.assertRaises(ValueError, zlib.decompressobj().flush, 0)
123123
self.assertRaises(ValueError, zlib.decompressobj().flush, -1)
124124

125+
@support.cpython_only
126+
def test_overflow(self):
127+
with self.assertRaisesRegex(OverflowError, 'int too large'):
128+
zlib.decompress(b'', 15, sys.maxsize + 1)
129+
with self.assertRaisesRegex(OverflowError, 'int too large'):
130+
zlib.decompressobj().flush(sys.maxsize + 1)
131+
125132

126133
class BaseCompressTestCase(object):
127134
def check_big_compress_buffer(self, size, compress_func):
128135
_1M = 1024 * 1024
129-
fmt = "%%0%dx" % (2 * _1M)
130136
# Generate 10MB worth of random, and expand it by repeating it.
131137
# The assumption is that zlib's memory is not big enough to exploit
132138
# such spread out redundancy.
@@ -196,6 +202,18 @@ def test_length_overflow(self, size):
196202
finally:
197203
data = None
198204

205+
@bigmemtest(size=_4G, memuse=1)
206+
def test_large_bufsize(self, size):
207+
# Test decompress(bufsize) parameter greater than the internal limit
208+
data = HAMLET_SCENE * 10
209+
compressed = zlib.compress(data, 1)
210+
self.assertEqual(zlib.decompress(compressed, 15, size), data)
211+
212+
def test_custom_bufsize(self):
213+
data = HAMLET_SCENE * 10
214+
compressed = zlib.compress(data, 1)
215+
self.assertEqual(zlib.decompress(compressed, 15, CustomInt()), data)
216+
199217

200218
class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
201219
# Test compression object
@@ -364,6 +382,21 @@ def test_maxlenmisc(self):
364382
self.assertRaises(ValueError, dco.decompress, b"", -1)
365383
self.assertEqual(b'', dco.unconsumed_tail)
366384

385+
def test_maxlen_large(self):
386+
# Sizes up to sys.maxsize should be accepted, although zlib is
387+
# internally limited to expressing sizes with unsigned int
388+
data = HAMLET_SCENE * 10
389+
self.assertGreater(len(data), zlib.DEF_BUF_SIZE)
390+
compressed = zlib.compress(data, 1)
391+
dco = zlib.decompressobj()
392+
self.assertEqual(dco.decompress(compressed, sys.maxsize), data)
393+
394+
def test_maxlen_custom(self):
395+
data = HAMLET_SCENE * 10
396+
compressed = zlib.compress(data, 1)
397+
dco = zlib.decompressobj()
398+
self.assertEqual(dco.decompress(compressed, CustomInt()), data[:100])
399+
367400
def test_clear_unconsumed_tail(self):
368401
# Issue #12050: calling decompress() without providing max_length
369402
# should clear the unconsumed_tail attribute.
@@ -537,6 +570,22 @@ def test_flush_with_freed_input(self):
537570
data = zlib.compress(input2)
538571
self.assertEqual(dco.flush(), input1[1:])
539572

573+
@bigmemtest(size=_4G, memuse=1)
574+
def test_flush_large_length(self, size):
575+
# Test flush(length) parameter greater than internal limit UINT_MAX
576+
input = HAMLET_SCENE * 10
577+
data = zlib.compress(input, 1)
578+
dco = zlib.decompressobj()
579+
dco.decompress(data, 1)
580+
self.assertEqual(dco.flush(size), input[1:])
581+
582+
def test_flush_custom_length(self):
583+
input = HAMLET_SCENE * 10
584+
data = zlib.compress(input, 1)
585+
dco = zlib.decompressobj()
586+
dco.decompress(data, 1)
587+
self.assertEqual(dco.flush(CustomInt()), input[1:])
588+
540589
@requires_Compress_copy
541590
def test_compresscopy(self):
542591
# Test copying a compression object
@@ -725,5 +774,10 @@ def choose_lines(source, number, seed=None, generator=random):
725774
"""
726775

727776

777+
class CustomInt:
778+
def __int__(self):
779+
return 100
780+
781+
728782
if __name__ == "__main__":
729783
unittest.main()

Misc/NEWS

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,13 @@ Core and Builtins
432432
Library
433433
-------
434434

435+
- Issue #25626: Change three zlib functions to accept sizes that fit in
436+
Py_ssize_t, but internally cap those sizes to UINT_MAX. This resolves a
437+
regression in 3.5 where GzipFile.read() failed to read chunks larger than 2
438+
or 4 GiB. The change affects the zlib.Decompress.decompress() max_length
439+
parameter, the zlib.decompress() bufsize parameter, and the
440+
zlib.Decompress.flush() length parameter.
441+
435442
- Issue #25583: Avoid incorrect errors raised by os.makedirs(exist_ok=True)
436443
when the OS gives priority to errors such as EACCES over EEXIST.
437444

Modules/clinic/zlibmodule.c.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ zlib_decompress(PyModuleDef *module, PyObject *args)
6868
unsigned int bufsize = DEF_BUF_SIZE;
6969

7070
if (!PyArg_ParseTuple(args, "y*|iO&:decompress",
71-
&data, &wbits, uint_converter, &bufsize))
71+
&data, &wbits, capped_uint_converter, &bufsize))
7272
goto exit;
7373
return_value = zlib_decompress_impl(module, &data, wbits, bufsize);
7474

@@ -242,7 +242,7 @@ zlib_Decompress_decompress(compobject *self, PyObject *args)
242242
unsigned int max_length = 0;
243243

244244
if (!PyArg_ParseTuple(args, "y*|O&:decompress",
245-
&data, uint_converter, &max_length))
245+
&data, capped_uint_converter, &max_length))
246246
goto exit;
247247
return_value = zlib_Decompress_decompress_impl(self, &data, max_length);
248248

@@ -353,7 +353,7 @@ zlib_Decompress_flush(compobject *self, PyObject *args)
353353
unsigned int length = DEF_BUF_SIZE;
354354

355355
if (!PyArg_ParseTuple(args, "|O&:flush",
356-
uint_converter, &length))
356+
capped_uint_converter, &length))
357357
goto exit;
358358
return_value = zlib_Decompress_flush_impl(self, length);
359359

@@ -438,4 +438,4 @@ zlib_crc32(PyModuleDef *module, PyObject *args)
438438
#ifndef ZLIB_COMPRESS_COPY_METHODDEF
439439
#define ZLIB_COMPRESS_COPY_METHODDEF
440440
#endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */
441-
/*[clinic end generated code: output=56ed1147bbbb4788 input=a9049054013a1b77]*/
441+
/*[clinic end generated code: output=7734aec079550bc8 input=a9049054013a1b77]*/

Modules/zlibmodule.c

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -226,42 +226,42 @@ zlib_compress_impl(PyModuleDef *module, Py_buffer *bytes, int level)
226226

227227
/*[python input]
228228
229-
class uint_converter(CConverter):
229+
class capped_uint_converter(CConverter):
230230
type = 'unsigned int'
231-
converter = 'uint_converter'
231+
converter = 'capped_uint_converter'
232232
c_ignored_default = "0"
233233
234234
[python start generated code]*/
235-
/*[python end generated code: output=da39a3ee5e6b4b0d input=22263855f7a3ebfd]*/
235+
/*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/
236236

237237
static int
238-
uint_converter(PyObject *obj, void *ptr)
238+
capped_uint_converter(PyObject *obj, void *ptr)
239239
{
240-
long val;
241-
unsigned long uval;
240+
PyObject *long_obj;
241+
Py_ssize_t val;
242242

243-
val = PyLong_AsLong(obj);
244-
if (val == -1 && PyErr_Occurred()) {
245-
uval = PyLong_AsUnsignedLong(obj);
246-
if (uval == (unsigned long)-1 && PyErr_Occurred())
247-
return 0;
243+
long_obj = (PyObject *)_PyLong_FromNbInt(obj);
244+
if (long_obj == NULL) {
245+
return 0;
248246
}
249-
else {
250-
if (val < 0) {
251-
PyErr_SetString(PyExc_ValueError,
252-
"value must be positive");
253-
return 0;
254-
}
255-
uval = (unsigned long)val;
247+
val = PyLong_AsSsize_t(long_obj);
248+
Py_DECREF(long_obj);
249+
if (val == -1 && PyErr_Occurred()) {
250+
return 0;
256251
}
257-
258-
if (uval > UINT_MAX) {
259-
PyErr_SetString(PyExc_OverflowError,
260-
"Python int too large for C unsigned int");
252+
if (val < 0) {
253+
PyErr_SetString(PyExc_ValueError,
254+
"value must be positive");
261255
return 0;
262256
}
263257

264-
*(unsigned int *)ptr = Py_SAFE_DOWNCAST(uval, unsigned long, unsigned int);
258+
if ((size_t)val > UINT_MAX) {
259+
*(unsigned int *)ptr = UINT_MAX;
260+
}
261+
else {
262+
*(unsigned int *)ptr = Py_SAFE_DOWNCAST(val, Py_ssize_t,
263+
unsigned int);
264+
}
265265
return 1;
266266
}
267267

@@ -272,7 +272,7 @@ zlib.decompress
272272
Compressed data.
273273
wbits: int(c_default="MAX_WBITS") = MAX_WBITS
274274
The window buffer size.
275-
bufsize: uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE
275+
bufsize: capped_uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE
276276
The initial output buffer size.
277277
/
278278
@@ -282,7 +282,7 @@ Returns a bytes object containing the uncompressed data.
282282
static PyObject *
283283
zlib_decompress_impl(PyModuleDef *module, Py_buffer *data, int wbits,
284284
unsigned int bufsize)
285-
/*[clinic end generated code: output=444d0987f3429574 input=0f4b9abb7103f50e]*/
285+
/*[clinic end generated code: output=444d0987f3429574 input=da095118b3243b27]*/
286286
{
287287
PyObject *result_str = NULL;
288288
Byte *input;
@@ -691,7 +691,7 @@ zlib.Decompress.decompress
691691
692692
data: Py_buffer
693693
The binary data to decompress.
694-
max_length: uint = 0
694+
max_length: capped_uint = 0
695695
The maximum allowable length of the decompressed data.
696696
Unconsumed input data will be stored in
697697
the unconsumed_tail attribute.
@@ -707,7 +707,7 @@ Call the flush() method to clear these buffers.
707707
static PyObject *
708708
zlib_Decompress_decompress_impl(compobject *self, Py_buffer *data,
709709
unsigned int max_length)
710-
/*[clinic end generated code: output=b82e2a2c19f5fe7b input=02cfc047377cec86]*/
710+
/*[clinic end generated code: output=b82e2a2c19f5fe7b input=68b6508ab07c2cf0]*/
711711
{
712712
int err;
713713
unsigned int old_length, length = DEF_BUF_SIZE;
@@ -1048,7 +1048,7 @@ zlib_Decompress_copy_impl(compobject *self)
10481048
/*[clinic input]
10491049
zlib.Decompress.flush
10501050
1051-
length: uint(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE
1051+
length: capped_uint(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE
10521052
the initial size of the output buffer.
10531053
/
10541054
@@ -1057,7 +1057,7 @@ Return a bytes object containing any remaining decompressed data.
10571057

10581058
static PyObject *
10591059
zlib_Decompress_flush_impl(compobject *self, unsigned int length)
1060-
/*[clinic end generated code: output=db6fb753ab698e22 input=1580956505978993]*/
1060+
/*[clinic end generated code: output=db6fb753ab698e22 input=1bb961eb21b62aa0]*/
10611061
{
10621062
int err;
10631063
unsigned int new_length;

0 commit comments

Comments
 (0)