From f71608ecdf0d1804ee2741d24b6402ead6a0559e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 25 May 2025 13:39:22 +0200 Subject: [PATCH 1/9] expose `zlib.{adler32,crc32}_combine` --- Lib/test/test_zlib.py | 98 +++++++++++++++++++++++++++++ Modules/clinic/zlibmodule.c.h | 112 +++++++++++++++++++++++++++++++++- Modules/zlibmodule.c | 88 ++++++++++++++++++++++++++ 3 files changed, 297 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 4d97fe56f3a094..fa92158747bd11 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -119,6 +119,104 @@ def test_same_as_binascii_crc32(self): self.assertEqual(binascii.crc32(b'spam'), zlib.crc32(b'spam')) +class ChecksumCombineMixin: + """Mixin class for testing checksum combination.""" + + N = 1000 + default_iv: int + + def parse_iv(self, iv): + """Parse an IV value. + + - The default IV is returned if *iv* is None. + - A random IV is returned if *iv* is -1. + - Otherwise, *iv* is returned as is. + """ + if iv is None: + return self.default_iv + if iv == -1: + return random.randint(1, 0x80000000) + return iv + + def checksum(self, data, init=None): + """Compute the checksum of data with a given initial value. + + The *init* value is parsed by ``parse_iv``. + """ + iv = self.parse_iv(init) + return self._checksum(data, iv) + + def _checksum(self, data, init): + raise NotImplementedError + + def combine(self, a, b, blen): + """Combine two checksums together.""" + raise NotImplementedError + + def get_random_data(self, data_len, *, iv=None): + """Get a triplet (data, iv, checksum).""" + data = random.randbytes(data_len) + init = self.parse_iv(iv) + checksum = self.checksum(data, init) + return data, init, checksum + + def test_combine_empty(self): + for _ in range(self.N): + a, iv, checksum = self.get_random_data(32, iv=-1) + res = self.combine(iv, self.checksum(a), len(a)) + self.assertEqual(res, checksum) + + def test_combine_no_iv(self): + for _ in range(self.N): + a, _, chk_a = self.get_random_data(32) + b, _, chk_b = self.get_random_data(64) + res = self.combine(chk_a, chk_b, len(b)) + self.assertEqual(res, self.checksum(a + b)) + + def test_combine_with_iv(self): + for _ in range(self.N): + a, iv_a, chk_a_with_iv = self.get_random_data(32, iv=-1) + chk_a_no_iv = self.checksum(a) + b, iv_b, chk_b_with_iv = self.get_random_data(64, iv=-1) + chk_b_no_iv = self.checksum(b) + + # We can represent c = COMBINE(CHK(a, iv_a), CHK(b, iv_b)) as: + # + # c = CHK(CHK(b'', iv_a) + CHK(a) + CHK(b'', iv_b) + CHK(b)) + # = COMBINE( + # COMBINE(CHK(b'', iv_a), CHK(a)), + # COMBINE(CHK(b'', iv_b), CHK(b)), + # ) + # = COMBINE(COMBINE(iv_a, CHK(a)), COMBINE(iv_b, CHK(b))) + tmp0 = self.combine(iv_a, chk_a_no_iv, len(a)) + tmp1 = self.combine(iv_b, chk_b_no_iv, len(b)) + expected = self.combine(tmp0, tmp1, len(b)) + checksum = self.combine(chk_a_with_iv, chk_b_with_iv, len(b)) + self.assertEqual(checksum, expected) + + +class CRC32CombineTestCase(ChecksumCombineMixin, unittest.TestCase): + + default_iv = 0 + + def _checksum(self, data, init): + return zlib.crc32(data, init) + + def combine(self, a, b, blen): + return zlib.crc32_combine(a, b, blen) + + +class Adler32CombineTestCase(ChecksumCombineMixin, unittest.TestCase): + + default_iv = 1 + + def _checksum(self, data, init): + return zlib.adler32(data, init) + + def combine(self, a, b, blen): + return zlib.adler32_combine(a, b, blen) + + # Issue #10276 - check that inputs >=4 GiB are handled correctly. class ChecksumBigBufferTestCase(unittest.TestCase): diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h index 2710f65a840db9..f09198236e4099 100644 --- a/Modules/clinic/zlibmodule.c.h +++ b/Modules/clinic/zlibmodule.c.h @@ -1044,6 +1044,61 @@ zlib_adler32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(zlib_adler32_combine__doc__, +"adler32_combine($module, adler1, adler2, blen, /)\n" +"--\n" +"\n" +"Combine two Adler-32 check values into one.\n" +"\n" +" adler1\n" +" Adler-32 check value for sequence A\n" +" adler2\n" +" Adler-32 check value for sequence B\n" +" blen\n" +" Length of sequence B\n" +"\n" +"Given an Adler-32 check value \'adler1\' of a sequence A and an Adler-32 check\n" +"value \'adler2\' of a sequence B of length \'blen\', the returned checksum\n" +"is the Adler-32 check value of A and B concatenated."); + +#define ZLIB_ADLER32_COMBINE_METHODDEF \ + {"adler32_combine", _PyCFunction_CAST(zlib_adler32_combine), METH_FASTCALL, zlib_adler32_combine__doc__}, + +static unsigned int +zlib_adler32_combine_impl(PyObject *module, unsigned int adler1, + unsigned int adler2, PyObject *blen); + +static PyObject * +zlib_adler32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + unsigned int adler1; + unsigned int adler2; + PyObject *blen; + unsigned int _return_value; + + if (!_PyArg_CheckPositional("adler32_combine", nargs, 3, 3)) { + goto exit; + } + adler1 = (unsigned int)PyLong_AsUnsignedLongMask(args[0]); + if (adler1 == (unsigned int)-1 && PyErr_Occurred()) { + goto exit; + } + adler2 = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); + if (adler2 == (unsigned int)-1 && PyErr_Occurred()) { + goto exit; + } + blen = args[2]; + _return_value = zlib_adler32_combine_impl(module, adler1, adler2, blen); + if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromUnsignedLong((unsigned long)_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(zlib_crc32__doc__, "crc32($module, data, value=0, /)\n" "--\n" @@ -1098,6 +1153,61 @@ zlib_crc32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(zlib_crc32_combine__doc__, +"crc32_combine($module, crc1, crc2, blen, /)\n" +"--\n" +"\n" +"Combine two CRC-32 check values into one.\n" +"\n" +" crc1\n" +" CRC-32 check value for sequence A\n" +" crc2\n" +" CRC-32 check value for sequence B\n" +" blen\n" +" Length of sequence B\n" +"\n" +"Given a CRC-32 check value \'crc1\' of a sequence A and a CRC-32 check\n" +"value \'crc2\' of a sequence B of length \'blen\', the returned checksum\n" +"is the CRC-32 check value of A and B concatenated."); + +#define ZLIB_CRC32_COMBINE_METHODDEF \ + {"crc32_combine", _PyCFunction_CAST(zlib_crc32_combine), METH_FASTCALL, zlib_crc32_combine__doc__}, + +static unsigned int +zlib_crc32_combine_impl(PyObject *module, unsigned int crc1, + unsigned int crc2, PyObject *blen); + +static PyObject * +zlib_crc32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + unsigned int crc1; + unsigned int crc2; + PyObject *blen; + unsigned int _return_value; + + if (!_PyArg_CheckPositional("crc32_combine", nargs, 3, 3)) { + goto exit; + } + crc1 = (unsigned int)PyLong_AsUnsignedLongMask(args[0]); + if (crc1 == (unsigned int)-1 && PyErr_Occurred()) { + goto exit; + } + crc2 = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); + if (crc2 == (unsigned int)-1 && PyErr_Occurred()) { + goto exit; + } + blen = args[2]; + _return_value = zlib_crc32_combine_impl(module, crc1, crc2, blen); + if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromUnsignedLong((unsigned long)_return_value); + +exit: + return return_value; +} + #ifndef ZLIB_COMPRESS_COPY_METHODDEF #define ZLIB_COMPRESS_COPY_METHODDEF #endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */ @@ -1121,4 +1231,4 @@ zlib_crc32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) #ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */ -/*[clinic end generated code: output=33938c7613a8c1c7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=342d8f887bd913e6 input=a9049054013a1b77]*/ diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index d4b4b91697c08e..21aa6eb6a31b87 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -17,6 +17,16 @@ #error "At least zlib version 1.2.2.1 is required" #endif +#if (SIZEOF_OFF_T == SIZEOF_SIZE_T) +# define convert_to_z_off_t PyLong_AsSsize_t +#elif (SIZEOF_OFF_T == SIZEOF_LONG_LONG) +# define convert_to_z_off_t PyLong_AsLongLong +#elif (SIZEOF_OFF_T == SIZEOF_LONG) +# define convert_to_z_off_t PyLong_AsLong +#else +# error off_t does not match either size_t, long, or long long! +#endif + // Blocks output buffer wrappers #include "pycore_blocks_output_buffer.h" @@ -1876,6 +1886,44 @@ zlib_adler32_impl(PyObject *module, Py_buffer *data, unsigned int value) return PyLong_FromUnsignedLong(value & 0xffffffffU); } +/*[clinic input] +zlib.adler32_combine -> unsigned_int + + adler1: unsigned_int(bitwise=True) + Adler-32 check value for sequence A + + adler2: unsigned_int(bitwise=True) + Adler-32 check value for sequence B + + blen: object + Length of sequence B + / + +Combine two Adler-32 check values into one. + +Given an Adler-32 check value 'adler1' of a sequence A and an Adler-32 check +value 'adler2' of a sequence B of length 'blen', the returned checksum +is the Adler-32 check value of A and B concatenated. +[clinic start generated code]*/ + +static unsigned int +zlib_adler32_combine_impl(PyObject *module, unsigned int adler1, + unsigned int adler2, PyObject *blen) +/*[clinic end generated code: output=57aee1d70f5e2908 input=29005ae6aaa024b3]*/ +{ +#if defined(Z_WANT64) + z_off64_t len = convert_to_z_off_t(blen); +#else + z_off_t len = convert_to_z_off_t(blen); +#endif + if (PyErr_Occurred()) { + return (unsigned int)-1; + } + return adler32_combine(adler1, adler2, len); +} + + + /*[clinic input] zlib.crc32 -> unsigned_int @@ -1923,13 +1971,50 @@ zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value) return value; } +/*[clinic input] +zlib.crc32_combine -> unsigned_int + + crc1: unsigned_int(bitwise=True) + CRC-32 check value for sequence A + + crc2: unsigned_int(bitwise=True) + CRC-32 check value for sequence B + + blen: object + Length of sequence B + / + +Combine two CRC-32 check values into one. + +Given a CRC-32 check value 'crc1' of a sequence A and a CRC-32 check +value 'crc2' of a sequence B of length 'blen', the returned checksum +is the CRC-32 check value of A and B concatenated. +[clinic start generated code]*/ + +static unsigned int +zlib_crc32_combine_impl(PyObject *module, unsigned int crc1, + unsigned int crc2, PyObject *blen) +/*[clinic end generated code: output=dece978b27e8eada input=4d394ee4d80aa35a]*/ +{ +#if defined(Z_WANT64) + z_off64_t len = convert_to_z_off_t(blen); +#else + z_off_t len = convert_to_z_off_t(blen); +#endif + if (PyErr_Occurred()) { + return (unsigned int)-1; + } + return crc32_combine(crc1, crc2, len); +} static PyMethodDef zlib_methods[] = { ZLIB_ADLER32_METHODDEF + ZLIB_ADLER32_COMBINE_METHODDEF ZLIB_COMPRESS_METHODDEF ZLIB_COMPRESSOBJ_METHODDEF ZLIB_CRC32_METHODDEF + ZLIB_CRC32_COMBINE_METHODDEF ZLIB_DECOMPRESS_METHODDEF ZLIB_DECOMPRESSOBJ_METHODDEF {NULL, NULL} @@ -1981,14 +2066,17 @@ static PyType_Spec ZlibDecompressor_type_spec = { .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE), .slots = ZlibDecompressor_type_slots, }; + PyDoc_STRVAR(zlib_module_documentation, "The functions in this module allow compression and decompression using the\n" "zlib library, which is based on GNU zip.\n" "\n" "adler32(string[, start]) -- Compute an Adler-32 checksum.\n" +"adler32_combine(adler1, adler2, len2) -- Combine two Adler-32 checksums.\n" "compress(data[, level]) -- Compress data, with compression level 0-9 or -1.\n" "compressobj([level[, ...]]) -- Return a compressor object.\n" "crc32(string[, start]) -- Compute a CRC-32 checksum.\n" +"crc32_combine(crc1, crc2, len2) -- Combine two CRC-32 checksums.\n" "decompress(string,[wbits],[bufsize]) -- Decompresses a compressed string.\n" "decompressobj([wbits[, zdict]]) -- Return a decompressor object.\n" "\n" From c8a96c81ba7d47281e8251e40b55027d1cc8f009 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 25 May 2025 13:50:22 +0200 Subject: [PATCH 2/9] update docs --- Doc/library/zlib.rst | 20 +++++++++++++++++++ Doc/whatsnew/3.15.rst | 10 ++++++++++ ...-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst | 3 +++ 3 files changed, 33 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index 75ead3c4cb144c..931ce2be7562bf 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -44,6 +44,16 @@ The available exception and functions in this module are: .. versionchanged:: 3.0 The result is always unsigned. +.. function:: adler32_combine(adler1, adler2, len2, /) + + Combine two Adler-32 checksums into one. + + Given an Adler-32 check value *adler1* of a sequence A and an Adler-32 check + value *adler2* of a sequence B of length *len2*, the returned checksum + is the Adler-32 check value of A and B concatenated. + + .. versionadded:: next + .. function:: compress(data, /, level=-1, wbits=MAX_WBITS) Compresses the bytes in *data*, returning a bytes object containing compressed data. @@ -136,6 +146,16 @@ The available exception and functions in this module are: .. versionchanged:: 3.0 The result is always unsigned. +.. function:: crc32_combine(crc1, crc2, len2, /) + + Combine two CRC-32 checksums into one. + + Given a CRC-32 check value *crc1* of a sequence A and a CRC-32 check + value *crc2* of a sequence B of length *len2*, the returned checksum + is the CRC-32 check value of A and B concatenated. + + .. versionadded:: next + .. function:: decompress(data, /, wbits=MAX_WBITS, bufsize=DEF_BUF_SIZE) Decompresses the bytes in *data*, returning a bytes object containing the diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index bf186c191b04d1..d1452ce1774a05 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -97,6 +97,16 @@ ssl (Contributed by Will Childs-Klein in :gh:`133624`.) +zlib +---- + +* Allow to combine two Adler-32 checksums via :func:`~zlib.adler32_combine`. + (Contributed by Callum Attryde and Bénédikt Tran in :gh:`134635`.) + +* Allow to combine two CRC-32 checksums via :func:`~zlib.crc32_combine`. + (Contributed by Bénédikt Tran in :gh:`134635`.) + + .. Add improved modules above alphabetically, not here at the end. Optimizations diff --git a/Misc/NEWS.d/next/Library/2025-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst b/Misc/NEWS.d/next/Library/2025-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst new file mode 100644 index 00000000000000..4cabbf2f896917 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst @@ -0,0 +1,3 @@ +:mod:`zlib`: Allow to combine Adler-32 and CRC-32 checksums via +:func:`~zlib.adler32_combine` and :func:`~zlib.crc32_combine`. Patch by +Callum Attryde and Bénédikt Tran. From c67d7badbae83e99221509fbf0600b6ecfb1b220 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 25 May 2025 17:34:11 +0200 Subject: [PATCH 3/9] Address doc review's feedback --- Doc/library/zlib.rst | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index 931ce2be7562bf..5471f2c369b27f 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -48,9 +48,13 @@ The available exception and functions in this module are: Combine two Adler-32 checksums into one. - Given an Adler-32 check value *adler1* of a sequence A and an Adler-32 check - value *adler2* of a sequence B of length *len2*, the returned checksum - is the Adler-32 check value of A and B concatenated. + Given the Adler-32 checksum *adler1* of a sequence ``A`` and the + Adler-32 checksum *adler2* of a sequence ``B`` of length *len2*, + return the Adler-32 checksum of ``A`` and ``B`` concatenated. + + This function is typically useful to combine Adler-32 checksums + that were concurrently computed. To compute checksums from a stream + of chunks, use :func:`adler32` sequentially instead. .. versionadded:: next @@ -150,9 +154,13 @@ The available exception and functions in this module are: Combine two CRC-32 checksums into one. - Given a CRC-32 check value *crc1* of a sequence A and a CRC-32 check - value *crc2* of a sequence B of length *len2*, the returned checksum - is the CRC-32 check value of A and B concatenated. + Given the CRC-32 checksum *crc1* of a sequence ``A`` and the + CRC-32 checksum *crc2* of a sequence ``B`` of length *len2*, + return the CRC-32 checksum of ``A`` and ``B`` concatenated. + + This function is typically useful to combine CRC-32 checksums + that were concurrently computed. To compute checksums from a + stream of chunks, use :func:`crc32` sequentially instead. .. versionadded:: next From 05405012b4cfa59b7d166f7989fb62da8edaa0f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 25 May 2025 17:35:32 +0200 Subject: [PATCH 4/9] Update What's New Co-authored-by: Emma Smith --- Doc/whatsnew/3.15.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index d1452ce1774a05..cd4b2e8b3dd8ed 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -100,10 +100,10 @@ ssl zlib ---- -* Allow to combine two Adler-32 checksums via :func:`~zlib.adler32_combine`. +* Allow combining two Adler-32 checksums via :func:`~zlib.adler32_combine`. (Contributed by Callum Attryde and Bénédikt Tran in :gh:`134635`.) -* Allow to combine two CRC-32 checksums via :func:`~zlib.crc32_combine`. +* Allow combining two CRC-32 checksums via :func:`~zlib.crc32_combine`. (Contributed by Bénédikt Tran in :gh:`134635`.) From c5a2c554cb0627fdf804c5987b0ba1a8e0711284 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 25 May 2025 17:43:28 +0200 Subject: [PATCH 5/9] add a failing test --- Lib/test/test_zlib.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index fa92158747bd11..52c3c5ecea7366 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -173,6 +173,14 @@ def test_combine_no_iv(self): res = self.combine(chk_a, chk_b, len(b)) self.assertEqual(res, self.checksum(a + b)) + def test_combine_no_iv_invalid_length(self): + a, _, chk_a = self.get_random_data(32) + b, _, chk_b = self.get_random_data(64) + checksum = self.checksum(a + b) + for invalid_len in [1, len(a), 48, len(b) + 1, 191]: + invalid_res = self.combine(chk_a, chk_b, invalid_len) + self.assertNotEqual(invalid_res, checksum) + def test_combine_with_iv(self): for _ in range(self.N): a, iv_a, chk_a_with_iv = self.get_random_data(32, iv=-1) From cc160a7f28864c6a71930d1977a4ccb572de4c7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 26 May 2025 11:20:53 +0200 Subject: [PATCH 6/9] update clinic --- Modules/clinic/zlibmodule.c.h | 50 +++++++++++++++++------------------ Modules/zlibmodule.c | 48 ++++++++++++++++----------------- 2 files changed, 49 insertions(+), 49 deletions(-) diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h index f09198236e4099..1a76efeceb9414 100644 --- a/Modules/clinic/zlibmodule.c.h +++ b/Modules/clinic/zlibmodule.c.h @@ -1045,28 +1045,28 @@ zlib_adler32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } PyDoc_STRVAR(zlib_adler32_combine__doc__, -"adler32_combine($module, adler1, adler2, blen, /)\n" +"adler32_combine($module, adler1, adler2, len2, /)\n" "--\n" "\n" -"Combine two Adler-32 check values into one.\n" +"Combine two Adler-32 checksums into one.\n" "\n" " adler1\n" -" Adler-32 check value for sequence A\n" +" Adler-32 checksum for sequence A\n" " adler2\n" -" Adler-32 check value for sequence B\n" -" blen\n" +" Adler-32 checksum for sequence B\n" +" len2\n" " Length of sequence B\n" "\n" -"Given an Adler-32 check value \'adler1\' of a sequence A and an Adler-32 check\n" -"value \'adler2\' of a sequence B of length \'blen\', the returned checksum\n" -"is the Adler-32 check value of A and B concatenated."); +"Given the Adler-32 checksum *adler1* of a sequence ``A`` and the\n" +"Adler-32 checksum *adler2* of a sequence ``B`` of length *len2*,\n" +"return the Adler-32 checksum of ``A`` and ``B`` concatenated."); #define ZLIB_ADLER32_COMBINE_METHODDEF \ {"adler32_combine", _PyCFunction_CAST(zlib_adler32_combine), METH_FASTCALL, zlib_adler32_combine__doc__}, static unsigned int zlib_adler32_combine_impl(PyObject *module, unsigned int adler1, - unsigned int adler2, PyObject *blen); + unsigned int adler2, PyObject *len2); static PyObject * zlib_adler32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) @@ -1074,7 +1074,7 @@ zlib_adler32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) PyObject *return_value = NULL; unsigned int adler1; unsigned int adler2; - PyObject *blen; + PyObject *len2; unsigned int _return_value; if (!_PyArg_CheckPositional("adler32_combine", nargs, 3, 3)) { @@ -1088,8 +1088,8 @@ zlib_adler32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (adler2 == (unsigned int)-1 && PyErr_Occurred()) { goto exit; } - blen = args[2]; - _return_value = zlib_adler32_combine_impl(module, adler1, adler2, blen); + len2 = args[2]; + _return_value = zlib_adler32_combine_impl(module, adler1, adler2, len2); if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) { goto exit; } @@ -1154,28 +1154,28 @@ zlib_crc32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } PyDoc_STRVAR(zlib_crc32_combine__doc__, -"crc32_combine($module, crc1, crc2, blen, /)\n" +"crc32_combine($module, crc1, crc2, len2, /)\n" "--\n" "\n" -"Combine two CRC-32 check values into one.\n" +"Combine two CRC-32 checksums into one.\n" "\n" " crc1\n" -" CRC-32 check value for sequence A\n" +" CRC-32 checksum for sequence A\n" " crc2\n" -" CRC-32 check value for sequence B\n" -" blen\n" +" CRC-32 checksum for sequence B\n" +" len2\n" " Length of sequence B\n" "\n" -"Given a CRC-32 check value \'crc1\' of a sequence A and a CRC-32 check\n" -"value \'crc2\' of a sequence B of length \'blen\', the returned checksum\n" -"is the CRC-32 check value of A and B concatenated."); +"Given the CRC-32 checksum *crc1* of a sequence ``A`` and the\n" +"CRC-32 checksum *crc2* of a sequence ``B`` of length *len2*,\n" +"return the CRC-32 checksum of ``A`` and ``B`` concatenated."); #define ZLIB_CRC32_COMBINE_METHODDEF \ {"crc32_combine", _PyCFunction_CAST(zlib_crc32_combine), METH_FASTCALL, zlib_crc32_combine__doc__}, static unsigned int zlib_crc32_combine_impl(PyObject *module, unsigned int crc1, - unsigned int crc2, PyObject *blen); + unsigned int crc2, PyObject *len2); static PyObject * zlib_crc32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) @@ -1183,7 +1183,7 @@ zlib_crc32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) PyObject *return_value = NULL; unsigned int crc1; unsigned int crc2; - PyObject *blen; + PyObject *len2; unsigned int _return_value; if (!_PyArg_CheckPositional("crc32_combine", nargs, 3, 3)) { @@ -1197,8 +1197,8 @@ zlib_crc32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (crc2 == (unsigned int)-1 && PyErr_Occurred()) { goto exit; } - blen = args[2]; - _return_value = zlib_crc32_combine_impl(module, crc1, crc2, blen); + len2 = args[2]; + _return_value = zlib_crc32_combine_impl(module, crc1, crc2, len2); if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) { goto exit; } @@ -1231,4 +1231,4 @@ zlib_crc32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) #ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */ -/*[clinic end generated code: output=342d8f887bd913e6 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a903453b7d04f755 input=a9049054013a1b77]*/ diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index 21aa6eb6a31b87..b4e49116ce6bdd 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -1890,31 +1890,31 @@ zlib_adler32_impl(PyObject *module, Py_buffer *data, unsigned int value) zlib.adler32_combine -> unsigned_int adler1: unsigned_int(bitwise=True) - Adler-32 check value for sequence A + Adler-32 checksum for sequence A adler2: unsigned_int(bitwise=True) - Adler-32 check value for sequence B + Adler-32 checksum for sequence B - blen: object + len2: object Length of sequence B / -Combine two Adler-32 check values into one. +Combine two Adler-32 checksums into one. -Given an Adler-32 check value 'adler1' of a sequence A and an Adler-32 check -value 'adler2' of a sequence B of length 'blen', the returned checksum -is the Adler-32 check value of A and B concatenated. +Given the Adler-32 checksum *adler1* of a sequence ``A`` and the +Adler-32 checksum *adler2* of a sequence ``B`` of length *len2*, +return the Adler-32 checksum of ``A`` and ``B`` concatenated. [clinic start generated code]*/ static unsigned int zlib_adler32_combine_impl(PyObject *module, unsigned int adler1, - unsigned int adler2, PyObject *blen) -/*[clinic end generated code: output=57aee1d70f5e2908 input=29005ae6aaa024b3]*/ + unsigned int adler2, PyObject *len2) +/*[clinic end generated code: output=61842cefb16afb1b input=8a706b73cbe1a31b]*/ { #if defined(Z_WANT64) - z_off64_t len = convert_to_z_off_t(blen); + z_off64_t len = convert_to_z_off_t(len2); #else - z_off_t len = convert_to_z_off_t(blen); + z_off_t len = convert_to_z_off_t(len2); #endif if (PyErr_Occurred()) { return (unsigned int)-1; @@ -1975,31 +1975,31 @@ zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value) zlib.crc32_combine -> unsigned_int crc1: unsigned_int(bitwise=True) - CRC-32 check value for sequence A + CRC-32 checksum for sequence A crc2: unsigned_int(bitwise=True) - CRC-32 check value for sequence B + CRC-32 checksum for sequence B - blen: object + len2: object Length of sequence B / -Combine two CRC-32 check values into one. +Combine two CRC-32 checksums into one. -Given a CRC-32 check value 'crc1' of a sequence A and a CRC-32 check -value 'crc2' of a sequence B of length 'blen', the returned checksum -is the CRC-32 check value of A and B concatenated. +Given the CRC-32 checksum *crc1* of a sequence ``A`` and the +CRC-32 checksum *crc2* of a sequence ``B`` of length *len2*, +return the CRC-32 checksum of ``A`` and ``B`` concatenated. [clinic start generated code]*/ static unsigned int zlib_crc32_combine_impl(PyObject *module, unsigned int crc1, - unsigned int crc2, PyObject *blen) -/*[clinic end generated code: output=dece978b27e8eada input=4d394ee4d80aa35a]*/ + unsigned int crc2, PyObject *len2) +/*[clinic end generated code: output=c4def907c602e6eb input=8eb70325fdee010d]*/ { #if defined(Z_WANT64) - z_off64_t len = convert_to_z_off_t(blen); + z_off64_t len = convert_to_z_off_t(len2); #else - z_off_t len = convert_to_z_off_t(blen); + z_off_t len = convert_to_z_off_t(len2); #endif if (PyErr_Occurred()) { return (unsigned int)-1; @@ -2072,11 +2072,11 @@ PyDoc_STRVAR(zlib_module_documentation, "zlib library, which is based on GNU zip.\n" "\n" "adler32(string[, start]) -- Compute an Adler-32 checksum.\n" -"adler32_combine(adler1, adler2, len2) -- Combine two Adler-32 checksums.\n" +"adler32_combine(adler1, adler2, len2, /) -- Combine two Adler-32 checksums.\n" "compress(data[, level]) -- Compress data, with compression level 0-9 or -1.\n" "compressobj([level[, ...]]) -- Return a compressor object.\n" "crc32(string[, start]) -- Compute a CRC-32 checksum.\n" -"crc32_combine(crc1, crc2, len2) -- Combine two CRC-32 checksums.\n" +"crc32_combine(crc1, crc2, len2, /) -- Combine two CRC-32 checksums.\n" "decompress(string,[wbits],[bufsize]) -- Decompresses a compressed string.\n" "decompressobj([wbits[, zdict]]) -- Return a decompressor object.\n" "\n" From f3fca34128b61ef4af3dd76e2dc5dce312a43b08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 26 May 2025 11:22:07 +0200 Subject: [PATCH 7/9] remove rST syntax in clinic --- Modules/clinic/zlibmodule.c.h | 14 +++++++------- Modules/zlibmodule.c | 16 ++++++++-------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h index 1a76efeceb9414..9898d01a8b9909 100644 --- a/Modules/clinic/zlibmodule.c.h +++ b/Modules/clinic/zlibmodule.c.h @@ -1057,9 +1057,9 @@ PyDoc_STRVAR(zlib_adler32_combine__doc__, " len2\n" " Length of sequence B\n" "\n" -"Given the Adler-32 checksum *adler1* of a sequence ``A`` and the\n" -"Adler-32 checksum *adler2* of a sequence ``B`` of length *len2*,\n" -"return the Adler-32 checksum of ``A`` and ``B`` concatenated."); +"Given the Adler-32 checksum \'adler1\' of a sequence A and the\n" +"Adler-32 checksum \'adler2\' of a sequence B of length \'len2\',\n" +"return the Adler-32 checksum of A and B concatenated."); #define ZLIB_ADLER32_COMBINE_METHODDEF \ {"adler32_combine", _PyCFunction_CAST(zlib_adler32_combine), METH_FASTCALL, zlib_adler32_combine__doc__}, @@ -1166,9 +1166,9 @@ PyDoc_STRVAR(zlib_crc32_combine__doc__, " len2\n" " Length of sequence B\n" "\n" -"Given the CRC-32 checksum *crc1* of a sequence ``A`` and the\n" -"CRC-32 checksum *crc2* of a sequence ``B`` of length *len2*,\n" -"return the CRC-32 checksum of ``A`` and ``B`` concatenated."); +"Given the CRC-32 checksum \'crc1\' of a sequence A and the\n" +"CRC-32 checksum \'crc2\' of a sequence B of length \'len2\',\n" +"return the CRC-32 checksum of A and B concatenated."); #define ZLIB_CRC32_COMBINE_METHODDEF \ {"crc32_combine", _PyCFunction_CAST(zlib_crc32_combine), METH_FASTCALL, zlib_crc32_combine__doc__}, @@ -1231,4 +1231,4 @@ zlib_crc32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) #ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */ -/*[clinic end generated code: output=a903453b7d04f755 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ba385aa7547e2c47 input=a9049054013a1b77]*/ diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index b4e49116ce6bdd..0c7e432f873feb 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -1901,15 +1901,15 @@ zlib.adler32_combine -> unsigned_int Combine two Adler-32 checksums into one. -Given the Adler-32 checksum *adler1* of a sequence ``A`` and the -Adler-32 checksum *adler2* of a sequence ``B`` of length *len2*, -return the Adler-32 checksum of ``A`` and ``B`` concatenated. +Given the Adler-32 checksum 'adler1' of a sequence A and the +Adler-32 checksum 'adler2' of a sequence B of length 'len2', +return the Adler-32 checksum of A and B concatenated. [clinic start generated code]*/ static unsigned int zlib_adler32_combine_impl(PyObject *module, unsigned int adler1, unsigned int adler2, PyObject *len2) -/*[clinic end generated code: output=61842cefb16afb1b input=8a706b73cbe1a31b]*/ +/*[clinic end generated code: output=61842cefb16afb1b input=6fc6ff7cff42a2f2]*/ { #if defined(Z_WANT64) z_off64_t len = convert_to_z_off_t(len2); @@ -1986,15 +1986,15 @@ zlib.crc32_combine -> unsigned_int Combine two CRC-32 checksums into one. -Given the CRC-32 checksum *crc1* of a sequence ``A`` and the -CRC-32 checksum *crc2* of a sequence ``B`` of length *len2*, -return the CRC-32 checksum of ``A`` and ``B`` concatenated. +Given the CRC-32 checksum 'crc1' of a sequence A and the +CRC-32 checksum 'crc2' of a sequence B of length 'len2', +return the CRC-32 checksum of A and B concatenated. [clinic start generated code]*/ static unsigned int zlib_crc32_combine_impl(PyObject *module, unsigned int crc1, unsigned int crc2, PyObject *len2) -/*[clinic end generated code: output=c4def907c602e6eb input=8eb70325fdee010d]*/ +/*[clinic end generated code: output=c4def907c602e6eb input=322bf6345c88161f]*/ { #if defined(Z_WANT64) z_off64_t len = convert_to_z_off_t(len2); From 2c34e5f48ef97871d84ba40e7bf4feb47668764a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 26 May 2025 11:24:12 +0200 Subject: [PATCH 8/9] add type checks --- Lib/test/test_zlib.py | 2 ++ Modules/clinic/zlibmodule.c.h | 10 +++++++++- Modules/zlibmodule.c | 8 ++++---- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 52c3c5ecea7366..c57ab51eca16b4 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -181,6 +181,8 @@ def test_combine_no_iv_invalid_length(self): invalid_res = self.combine(chk_a, chk_b, invalid_len) self.assertNotEqual(invalid_res, checksum) + self.assertRaises(TypeError, self.combine, 0, 0, "len") + def test_combine_with_iv(self): for _ in range(self.N): a, iv_a, chk_a_with_iv = self.get_random_data(32, iv=-1) diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h index 9898d01a8b9909..146a7e250019f0 100644 --- a/Modules/clinic/zlibmodule.c.h +++ b/Modules/clinic/zlibmodule.c.h @@ -1088,6 +1088,10 @@ zlib_adler32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (adler2 == (unsigned int)-1 && PyErr_Occurred()) { goto exit; } + if (!PyLong_Check(args[2])) { + _PyArg_BadArgument("adler32_combine", "argument 3", "int", args[2]); + goto exit; + } len2 = args[2]; _return_value = zlib_adler32_combine_impl(module, adler1, adler2, len2); if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) { @@ -1197,6 +1201,10 @@ zlib_crc32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (crc2 == (unsigned int)-1 && PyErr_Occurred()) { goto exit; } + if (!PyLong_Check(args[2])) { + _PyArg_BadArgument("crc32_combine", "argument 3", "int", args[2]); + goto exit; + } len2 = args[2]; _return_value = zlib_crc32_combine_impl(module, crc1, crc2, len2); if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) { @@ -1231,4 +1239,4 @@ zlib_crc32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) #ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */ -/*[clinic end generated code: output=ba385aa7547e2c47 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=3f7692eb3b5d5a0c input=a9049054013a1b77]*/ diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index 0c7e432f873feb..f7009364644b7e 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -1895,7 +1895,7 @@ zlib.adler32_combine -> unsigned_int adler2: unsigned_int(bitwise=True) Adler-32 checksum for sequence B - len2: object + len2: object(subclass_of='&PyLong_Type') Length of sequence B / @@ -1909,7 +1909,7 @@ return the Adler-32 checksum of A and B concatenated. static unsigned int zlib_adler32_combine_impl(PyObject *module, unsigned int adler1, unsigned int adler2, PyObject *len2) -/*[clinic end generated code: output=61842cefb16afb1b input=6fc6ff7cff42a2f2]*/ +/*[clinic end generated code: output=61842cefb16afb1b input=51bb045c95130c6f]*/ { #if defined(Z_WANT64) z_off64_t len = convert_to_z_off_t(len2); @@ -1980,7 +1980,7 @@ zlib.crc32_combine -> unsigned_int crc2: unsigned_int(bitwise=True) CRC-32 checksum for sequence B - len2: object + len2: object(subclass_of='&PyLong_Type') Length of sequence B / @@ -1994,7 +1994,7 @@ return the CRC-32 checksum of A and B concatenated. static unsigned int zlib_crc32_combine_impl(PyObject *module, unsigned int crc1, unsigned int crc2, PyObject *len2) -/*[clinic end generated code: output=c4def907c602e6eb input=322bf6345c88161f]*/ +/*[clinic end generated code: output=c4def907c602e6eb input=9c8a065d9040dc66]*/ { #if defined(Z_WANT64) z_off64_t len = convert_to_z_off_t(len2); From 2753b180b85b40e4f274b08fe2baf161f92b9821 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 26 May 2025 17:11:43 +0200 Subject: [PATCH 9/9] Apply suggestions from code review Co-authored-by: Emma Smith --- Doc/library/zlib.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index 5471f2c369b27f..7c5e9b086e170d 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -53,8 +53,8 @@ The available exception and functions in this module are: return the Adler-32 checksum of ``A`` and ``B`` concatenated. This function is typically useful to combine Adler-32 checksums - that were concurrently computed. To compute checksums from a stream - of chunks, use :func:`adler32` sequentially instead. + that were concurrently computed. To compute checksums sequentially, use + :func:`adler32` with the running checksum as the ``value`` argument. .. versionadded:: next @@ -159,8 +159,8 @@ The available exception and functions in this module are: return the CRC-32 checksum of ``A`` and ``B`` concatenated. This function is typically useful to combine CRC-32 checksums - that were concurrently computed. To compute checksums from a - stream of chunks, use :func:`crc32` sequentially instead. + that were concurrently computed. To compute checksums sequentially, use + :func:`crc32` with the running checksum as the ``value`` argument. .. versionadded:: next