Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e99e977

Browse files
committed
Issue #25626: Change zlib to accept Py_ssize_t and cap to UINT_MAX
The underlying zlib library stores sizes in “unsigned int”. The corresponding Python parameters are all sizes of buffers filled in by zlib, so it is okay to reduce higher values to the UINT_MAX internal cap. OverflowError is still raised for sizes that do not fit in Py_ssize_t. Sizes are now limited to Py_ssize_t rather than unsigned long, because Python byte strings cannot be larger than Py_ssize_t. Previously this could result in a SystemError on 32-bit platforms. This resolves a regression in the gzip module when reading more than UINT_MAX or LONG_MAX bytes in one call, introduced by revision 62723172412c.
1 parent d13cade commit e99e977

7 files changed

Lines changed: 121 additions & 48 deletions

File tree

Doc/howto/clinic.rst

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,18 +1249,18 @@ Here's the simplest example of a custom converter, from ``Modules/zlibmodule.c``
12491249

12501250
/*[python input]
12511251

1252-
class uint_converter(CConverter):
1252+
class capped_uint_converter(CConverter):
12531253
type = 'unsigned int'
1254-
converter = 'uint_converter'
1254+
converter = 'capped_uint_converter'
12551255

12561256
[python start generated code]*/
1257-
/*[python end generated code: checksum=da39a3ee5e6b4b0d3255bfef95601890afd80709]*/
1257+
/*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/
12581258

1259-
This block adds a converter to Argument Clinic named ``uint``. Parameters
1260-
declared as ``uint`` will be declared as type ``unsigned int``, and will
1261-
be parsed by the ``'O&'`` format unit, which will call the ``uint_converter``
1262-
converter function.
1263-
``uint`` variables automatically support default values.
1259+
This block adds a converter to Argument Clinic named ``capped_uint``. Parameters
1260+
declared as ``capped_uint`` will be declared as type ``unsigned int``, and will
1261+
be parsed by the ``'O&'`` format unit, which will call the
1262+
``capped_uint_converter`` converter function. ``capped_uint`` variables
1263+
automatically support default values.
12641264

12651265
More sophisticated custom converters can insert custom C code to
12661266
handle initialization and cleanup.

Lib/test/support/__init__.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,12 +1608,15 @@ def stop(self):
16081608
def bigmemtest(size, memuse, dry_run=True):
16091609
"""Decorator for bigmem tests.
16101610
1611-
'minsize' is the minimum useful size for the test (in arbitrary,
1612-
test-interpreted units.) 'memuse' is the number of 'bytes per size' for
1613-
the test, or a good estimate of it.
1614-
1615-
if 'dry_run' is False, it means the test doesn't support dummy runs
1616-
when -M is not specified.
1611+
'size' is a requested size for the test (in arbitrary, test-interpreted
1612+
units.) 'memuse' is the number of bytes per unit for the test, or a good
1613+
estimate of it. For example, a test that needs two byte buffers, of 4 GiB
1614+
each, could be decorated with @bigmemtest(size=_4G, memuse=2).
1615+
1616+
The 'size' argument is normally passed to the decorated test method as an
1617+
extra argument. If 'dry_run' is true, the value passed to the test method
1618+
may be less than the requested value. If 'dry_run' is false, it means the
1619+
test doesn't support dummy runs when -M is not specified.
16171620
"""
16181621
def decorator(f):
16191622
def wrapper(self):

Lib/test/test_gzip.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import unittest
55
from test import support
6+
from test.support import bigmemtest, _4G
67
import os
78
import io
89
import struct
@@ -116,6 +117,14 @@ def test_read1(self):
116117
self.assertEqual(f.tell(), nread)
117118
self.assertEqual(b''.join(blocks), data1 * 50)
118119

120+
@bigmemtest(size=_4G, memuse=1)
121+
def test_read_large(self, size):
122+
# Read chunk size over UINT_MAX should be supported, despite zlib's
123+
# limitation per low-level call
124+
compressed = gzip.compress(data1, compresslevel=1)
125+
f = gzip.GzipFile(fileobj=io.BytesIO(compressed), mode='rb')
126+
self.assertEqual(f.read(size), data1)
127+
119128
def test_io_on_closed_object(self):
120129
# Test that I/O operations on closed GzipFile objects raise a
121130
# ValueError, just like the corresponding functions on file objects.

Lib/test/test_zlib.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,17 @@ def test_decompressobj_badflush(self):
122122
self.assertRaises(ValueError, zlib.decompressobj().flush, 0)
123123
self.assertRaises(ValueError, zlib.decompressobj().flush, -1)
124124

125+
@support.cpython_only
126+
def test_overflow(self):
127+
with self.assertRaisesRegex(OverflowError, 'int too large'):
128+
zlib.decompress(b'', 15, sys.maxsize + 1)
129+
with self.assertRaisesRegex(OverflowError, 'int too large'):
130+
zlib.decompressobj().flush(sys.maxsize + 1)
131+
125132

126133
class BaseCompressTestCase(object):
127134
def check_big_compress_buffer(self, size, compress_func):
128135
_1M = 1024 * 1024
129-
fmt = "%%0%dx" % (2 * _1M)
130136
# Generate 10MB worth of random, and expand it by repeating it.
131137
# The assumption is that zlib's memory is not big enough to exploit
132138
# such spread out redundancy.
@@ -196,6 +202,18 @@ def test_length_overflow(self, size):
196202
finally:
197203
data = None
198204

205+
@bigmemtest(size=_4G, memuse=1)
206+
def test_large_bufsize(self, size):
207+
# Test decompress(bufsize) parameter greater than the internal limit
208+
data = HAMLET_SCENE * 10
209+
compressed = zlib.compress(data, 1)
210+
self.assertEqual(zlib.decompress(compressed, 15, size), data)
211+
212+
def test_custom_bufsize(self):
213+
data = HAMLET_SCENE * 10
214+
compressed = zlib.compress(data, 1)
215+
self.assertEqual(zlib.decompress(compressed, 15, CustomInt()), data)
216+
199217

200218
class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
201219
# Test compression object
@@ -364,6 +382,21 @@ def test_maxlenmisc(self):
364382
self.assertRaises(ValueError, dco.decompress, b"", -1)
365383
self.assertEqual(b'', dco.unconsumed_tail)
366384

385+
def test_maxlen_large(self):
386+
# Sizes up to sys.maxsize should be accepted, although zlib is
387+
# internally limited to expressing sizes with unsigned int
388+
data = HAMLET_SCENE * 10
389+
self.assertGreater(len(data), zlib.DEF_BUF_SIZE)
390+
compressed = zlib.compress(data, 1)
391+
dco = zlib.decompressobj()
392+
self.assertEqual(dco.decompress(compressed, sys.maxsize), data)
393+
394+
def test_maxlen_custom(self):
395+
data = HAMLET_SCENE * 10
396+
compressed = zlib.compress(data, 1)
397+
dco = zlib.decompressobj()
398+
self.assertEqual(dco.decompress(compressed, CustomInt()), data[:100])
399+
367400
def test_clear_unconsumed_tail(self):
368401
# Issue #12050: calling decompress() without providing max_length
369402
# should clear the unconsumed_tail attribute.
@@ -537,6 +570,22 @@ def test_flush_with_freed_input(self):
537570
data = zlib.compress(input2)
538571
self.assertEqual(dco.flush(), input1[1:])
539572

573+
@bigmemtest(size=_4G, memuse=1)
574+
def test_flush_large_length(self, size):
575+
# Test flush(length) parameter greater than internal limit UINT_MAX
576+
input = HAMLET_SCENE * 10
577+
data = zlib.compress(input, 1)
578+
dco = zlib.decompressobj()
579+
dco.decompress(data, 1)
580+
self.assertEqual(dco.flush(size), input[1:])
581+
582+
def test_flush_custom_length(self):
583+
input = HAMLET_SCENE * 10
584+
data = zlib.compress(input, 1)
585+
dco = zlib.decompressobj()
586+
dco.decompress(data, 1)
587+
self.assertEqual(dco.flush(CustomInt()), input[1:])
588+
540589
@requires_Compress_copy
541590
def test_compresscopy(self):
542591
# Test copying a compression object
@@ -725,5 +774,10 @@ def choose_lines(source, number, seed=None, generator=random):
725774
"""
726775

727776

777+
class CustomInt:
778+
def __int__(self):
779+
return 100
780+
781+
728782
if __name__ == "__main__":
729783
unittest.main()

Misc/NEWS

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,13 @@ Core and Builtins
7777
Library
7878
-------
7979

80+
- Issue #25626: Change three zlib functions to accept sizes that fit in
81+
Py_ssize_t, but internally cap those sizes to UINT_MAX. This resolves a
82+
regression in 3.5 where GzipFile.read() failed to read chunks larger than 2
83+
or 4 GiB. The change affects the zlib.Decompress.decompress() max_length
84+
parameter, the zlib.decompress() bufsize parameter, and the
85+
zlib.Decompress.flush() length parameter.
86+
8087
- Issue #25583: Avoid incorrect errors raised by os.makedirs(exist_ok=True)
8188
when the OS gives priority to errors such as EACCES over EEXIST.
8289

Modules/clinic/zlibmodule.c.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ zlib_decompress(PyModuleDef *module, PyObject *args)
6868
unsigned int bufsize = DEF_BUF_SIZE;
6969

7070
if (!PyArg_ParseTuple(args, "y*|iO&:decompress",
71-
&data, &wbits, uint_converter, &bufsize))
71+
&data, &wbits, capped_uint_converter, &bufsize))
7272
goto exit;
7373
return_value = zlib_decompress_impl(module, &data, wbits, bufsize);
7474

@@ -242,7 +242,7 @@ zlib_Decompress_decompress(compobject *self, PyObject *args)
242242
unsigned int max_length = 0;
243243

244244
if (!PyArg_ParseTuple(args, "y*|O&:decompress",
245-
&data, uint_converter, &max_length))
245+
&data, capped_uint_converter, &max_length))
246246
goto exit;
247247
return_value = zlib_Decompress_decompress_impl(self, &data, max_length);
248248

@@ -353,7 +353,7 @@ zlib_Decompress_flush(compobject *self, PyObject *args)
353353
unsigned int length = DEF_BUF_SIZE;
354354

355355
if (!PyArg_ParseTuple(args, "|O&:flush",
356-
uint_converter, &length))
356+
capped_uint_converter, &length))
357357
goto exit;
358358
return_value = zlib_Decompress_flush_impl(self, length);
359359

@@ -438,4 +438,4 @@ zlib_crc32(PyModuleDef *module, PyObject *args)
438438
#ifndef ZLIB_COMPRESS_COPY_METHODDEF
439439
#define ZLIB_COMPRESS_COPY_METHODDEF
440440
#endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */
441-
/*[clinic end generated code: output=56ed1147bbbb4788 input=a9049054013a1b77]*/
441+
/*[clinic end generated code: output=7734aec079550bc8 input=a9049054013a1b77]*/

Modules/zlibmodule.c

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -226,42 +226,42 @@ zlib_compress_impl(PyModuleDef *module, Py_buffer *bytes, int level)
226226

227227
/*[python input]
228228
229-
class uint_converter(CConverter):
229+
class capped_uint_converter(CConverter):
230230
type = 'unsigned int'
231-
converter = 'uint_converter'
231+
converter = 'capped_uint_converter'
232232
c_ignored_default = "0"
233233
234234
[python start generated code]*/
235-
/*[python end generated code: output=da39a3ee5e6b4b0d input=22263855f7a3ebfd]*/
235+
/*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/
236236

237237
static int
238-
uint_converter(PyObject *obj, void *ptr)
238+
capped_uint_converter(PyObject *obj, void *ptr)
239239
{
240-
long val;
241-
unsigned long uval;
240+
PyObject *long_obj;
241+
Py_ssize_t val;
242242

243-
val = PyLong_AsLong(obj);
244-
if (val == -1 && PyErr_Occurred()) {
245-
uval = PyLong_AsUnsignedLong(obj);
246-
if (uval == (unsigned long)-1 && PyErr_Occurred())
247-
return 0;
243+
long_obj = (PyObject *)_PyLong_FromNbInt(obj);
244+
if (long_obj == NULL) {
245+
return 0;
248246
}
249-
else {
250-
if (val < 0) {
251-
PyErr_SetString(PyExc_ValueError,
252-
"value must be positive");
253-
return 0;
254-
}
255-
uval = (unsigned long)val;
247+
val = PyLong_AsSsize_t(long_obj);
248+
Py_DECREF(long_obj);
249+
if (val == -1 && PyErr_Occurred()) {
250+
return 0;
256251
}
257-
258-
if (uval > UINT_MAX) {
259-
PyErr_SetString(PyExc_OverflowError,
260-
"Python int too large for C unsigned int");
252+
if (val < 0) {
253+
PyErr_SetString(PyExc_ValueError,
254+
"value must be positive");
261255
return 0;
262256
}
263257

264-
*(unsigned int *)ptr = Py_SAFE_DOWNCAST(uval, unsigned long, unsigned int);
258+
if ((size_t)val > UINT_MAX) {
259+
*(unsigned int *)ptr = UINT_MAX;
260+
}
261+
else {
262+
*(unsigned int *)ptr = Py_SAFE_DOWNCAST(val, Py_ssize_t,
263+
unsigned int);
264+
}
265265
return 1;
266266
}
267267

@@ -272,7 +272,7 @@ zlib.decompress
272272
Compressed data.
273273
wbits: int(c_default="MAX_WBITS") = MAX_WBITS
274274
The window buffer size.
275-
bufsize: uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE
275+
bufsize: capped_uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE
276276
The initial output buffer size.
277277
/
278278
@@ -282,7 +282,7 @@ Returns a bytes object containing the uncompressed data.
282282
static PyObject *
283283
zlib_decompress_impl(PyModuleDef *module, Py_buffer *data, int wbits,
284284
unsigned int bufsize)
285-
/*[clinic end generated code: output=444d0987f3429574 input=0f4b9abb7103f50e]*/
285+
/*[clinic end generated code: output=444d0987f3429574 input=da095118b3243b27]*/
286286
{
287287
PyObject *result_str = NULL;
288288
Byte *input;
@@ -691,7 +691,7 @@ zlib.Decompress.decompress
691691
692692
data: Py_buffer
693693
The binary data to decompress.
694-
max_length: uint = 0
694+
max_length: capped_uint = 0
695695
The maximum allowable length of the decompressed data.
696696
Unconsumed input data will be stored in
697697
the unconsumed_tail attribute.
@@ -707,7 +707,7 @@ Call the flush() method to clear these buffers.
707707
static PyObject *
708708
zlib_Decompress_decompress_impl(compobject *self, Py_buffer *data,
709709
unsigned int max_length)
710-
/*[clinic end generated code: output=b82e2a2c19f5fe7b input=02cfc047377cec86]*/
710+
/*[clinic end generated code: output=b82e2a2c19f5fe7b input=68b6508ab07c2cf0]*/
711711
{
712712
int err;
713713
unsigned int old_length, length = DEF_BUF_SIZE;
@@ -1048,7 +1048,7 @@ zlib_Decompress_copy_impl(compobject *self)
10481048
/*[clinic input]
10491049
zlib.Decompress.flush
10501050
1051-
length: uint(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE
1051+
length: capped_uint(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE
10521052
the initial size of the output buffer.
10531053
/
10541054
@@ -1057,7 +1057,7 @@ Return a bytes object containing any remaining decompressed data.
10571057

10581058
static PyObject *
10591059
zlib_Decompress_flush_impl(compobject *self, unsigned int length)
1060-
/*[clinic end generated code: output=db6fb753ab698e22 input=1580956505978993]*/
1060+
/*[clinic end generated code: output=db6fb753ab698e22 input=1bb961eb21b62aa0]*/
10611061
{
10621062
int err;
10631063
unsigned int new_length;

0 commit comments

Comments
 (0)