From aa812f8b3c5bbcdd34117d0dacf48e6785dc6bf1 Mon Sep 17 00:00:00 2001 From: Ma Lin Date: Thu, 17 Mar 2022 13:25:08 +0800 Subject: [PATCH 1/8] remove invalid document of checksum functions Since CPython 3.0.0, the checksums are always truncated to `unsigned int`. --- Doc/library/binascii.rst | 7 +------ Doc/library/zlib.rst | 10 ---------- Lib/test/test_binascii.py | 10 ++++++++++ Lib/test/test_zlib.py | 11 +++++++++++ .../2022-03-17-13-35-28.bpo-47040.4Dn48U.rst | 2 ++ Modules/zlibmodule.c | 8 +++----- 6 files changed, 27 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/Documentation/2022-03-17-13-35-28.bpo-47040.4Dn48U.rst diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 62d7efe34ab364..e9ad1d2d8f4a18 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -107,7 +107,7 @@ The :mod:`binascii` module defines the following functions: .. function:: crc32(data[, value]) - Compute CRC-32, the 32-bit checksum of *data*, starting with an + Compute CRC-32, the unsigned 32-bit checksum of *data*, starting with an initial CRC of *value*. The default initial CRC is zero. The algorithm is consistent with the ZIP file checksum. Since the algorithm is designed for use as a checksum algorithm, it is not suitable for use as a general hash @@ -119,11 +119,6 @@ The :mod:`binascii` module defines the following functions: crc = binascii.crc32(b" world", crc) print('crc32 = {:#010x}'.format(crc)) - .. versionchanged:: 3.0 - The result is always unsigned. - To generate the same numeric value across all Python versions and - platforms, use ``crc32(data) & 0xffffffff``. - .. function:: b2a_hex(data[, sep[, bytes_per_sep=1]]) hexlify(data[, sep[, bytes_per_sep=1]]) diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index 793c90f3c4e7a4..6c17f472229e1f 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -41,11 +41,6 @@ The available exception and functions in this module are: the algorithm is designed for use as a checksum algorithm, it is not suitable for use as a general hash algorithm. - .. versionchanged:: 3.0 - Always returns an unsigned value. - To generate the same numeric value across all Python versions and - platforms, use ``adler32(data) & 0xffffffff``. - .. function:: compress(data, /, level=-1, wbits=MAX_WBITS) @@ -136,11 +131,6 @@ The available exception and functions in this module are: the algorithm is designed for use as a checksum algorithm, it is not suitable for use as a general hash algorithm. - .. versionchanged:: 3.0 - Always returns an unsigned value. - To generate the same numeric value across all Python versions and - platforms, use ``crc32(data) & 0xffffffff``. - .. function:: decompress(data, /, wbits=MAX_WBITS, bufsize=DEF_BUF_SIZE) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index b5aa847b943e69..e03722a5c4e6c6 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -3,6 +3,7 @@ import unittest import binascii import array +import random import re from test.support import warnings_helper @@ -241,6 +242,15 @@ def test_crc32(self): self.assertRaises(TypeError, binascii.crc32) + def test_random_crc32(self): + dat = random.randbytes(1234) + UINT_MAX = 0xFFFF_FFFF + + self.assertTrue(0 <= binascii.crc32(dat) <= UINT_MAX) + + self.assertEqual(binascii.crc32(dat, UINT_MAX+123), + binascii.crc32(dat, (UINT_MAX+123) & UINT_MAX)) + def test_hex(self): # test hexlification s = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000' diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index f20aad051da960..f1ee454af14831 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -104,6 +104,17 @@ def test_same_as_binascii_crc32(self): self.assertEqual(zlib.crc32(foo), crc) self.assertEqual(binascii.crc32(b'spam'), zlib.crc32(b'spam')) + def test_random_checksum(self): + dat = random.randbytes(1234) + UINT_MAX = 0xFFFF_FFFF + + self.assertTrue(0 <= zlib.adler32(dat) <= UINT_MAX) + self.assertTrue(0 <= zlib.crc32(dat) <= UINT_MAX) + + self.assertEqual(zlib.adler32(dat, UINT_MAX+123), + zlib.adler32(dat, (UINT_MAX+123) & UINT_MAX)) + self.assertEqual(zlib.crc32(dat, UINT_MAX+123), + zlib.crc32(dat, (UINT_MAX+123) & UINT_MAX)) # Issue #10276 - check that inputs >=4 GiB are handled correctly. class ChecksumBigBufferTestCase(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Documentation/2022-03-17-13-35-28.bpo-47040.4Dn48U.rst b/Misc/NEWS.d/next/Documentation/2022-03-17-13-35-28.bpo-47040.4Dn48U.rst new file mode 100644 index 00000000000000..fb1f7ec72328f6 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2022-03-17-13-35-28.bpo-47040.4Dn48U.rst @@ -0,0 +1,2 @@ +Remove invalid document, the results of :func:`zlib.adler32` / +:func:`zlib.crc32` / :func:`binascii.crc32` don't need ``& 0xFFFF_FFFF``. diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index f9646568d7e01d..4cf1b6eeba2f7e 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -1436,8 +1436,6 @@ static PyObject * zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value) /*[clinic end generated code: output=63499fa20af7ea25 input=26c3ed430fa00b4c]*/ { - int signed_val; - /* Releasing the GIL for very small buffers is inefficient and may lower performance */ if (data->len > 1024*5) { @@ -1452,12 +1450,12 @@ zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value) buf += (size_t) UINT_MAX; len -= (size_t) UINT_MAX; } - signed_val = crc32(value, buf, (unsigned int)len); + value = crc32(value, buf, (unsigned int)len); Py_END_ALLOW_THREADS } else { - signed_val = crc32(value, data->buf, (unsigned int)data->len); + value = crc32(value, data->buf, (unsigned int)data->len); } - return PyLong_FromUnsignedLong(signed_val & 0xffffffffU); + return PyLong_FromUnsignedLong(value & 0xffffffffU); } From 03ec8273574f3e8929d67ddecfac3e8fd7be2d9c Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 17 Mar 2022 17:51:24 -0700 Subject: [PATCH 2/8] reworded. --- .../Documentation/2022-03-17-13-35-28.bpo-47040.4Dn48U.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Documentation/2022-03-17-13-35-28.bpo-47040.4Dn48U.rst b/Misc/NEWS.d/next/Documentation/2022-03-17-13-35-28.bpo-47040.4Dn48U.rst index fb1f7ec72328f6..73da25cb19f77c 100644 --- a/Misc/NEWS.d/next/Documentation/2022-03-17-13-35-28.bpo-47040.4Dn48U.rst +++ b/Misc/NEWS.d/next/Documentation/2022-03-17-13-35-28.bpo-47040.4Dn48U.rst @@ -1,2 +1,3 @@ -Remove invalid document, the results of :func:`zlib.adler32` / -:func:`zlib.crc32` / :func:`binascii.crc32` don't need ``& 0xFFFF_FFFF``. +Internal cleanup to :func:`zlib.crc32` / :func:`binascii.crc32` to not use +an intermediate signed value. No functional change. Clarified the old Python +versions compatiblity note in the docstrings. From c123b56f96f28d06acc18fbc7ccfbf602b58d024 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Fri, 18 Mar 2022 10:57:38 -0700 Subject: [PATCH 3/8] update versionchanged text to be more explicit. --- Doc/library/binascii.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index e9ad1d2d8f4a18..57011a025b5115 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -119,6 +119,10 @@ The :mod:`binascii` module defines the following functions: crc = binascii.crc32(b" world", crc) print('crc32 = {:#010x}'.format(crc)) +.. versionchanged:: 3.0 + The result is always unsigned. + To generate the same numeric value when using Python 2 or earlier, + use ``crc32(data) & 0xffffffff``. .. function:: b2a_hex(data[, sep[, bytes_per_sep=1]]) hexlify(data[, sep[, bytes_per_sep=1]]) From 2e70760ac9f493834d6b0f542c759b71f854c818 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Fri, 18 Mar 2022 10:58:52 -0700 Subject: [PATCH 4/8] update the versionchanged text to be more explicit --- Doc/library/zlib.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index 6c17f472229e1f..0fb38390df0a68 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -41,6 +41,10 @@ The available exception and functions in this module are: the algorithm is designed for use as a checksum algorithm, it is not suitable for use as a general hash algorithm. +.. versionchanged:: 3.0 + The result is always unsigned. + To generate the same numeric value when using Python 2 or earlier, + use ``adler32(data) & 0xffffffff``. .. function:: compress(data, /, level=-1, wbits=MAX_WBITS) @@ -131,6 +135,10 @@ The available exception and functions in this module are: the algorithm is designed for use as a checksum algorithm, it is not suitable for use as a general hash algorithm. +.. versionchanged:: 3.0 + The result is always unsigned. + To generate the same numeric value when using Python 2 or earlier, + use ``crc32(data) & 0xffffffff``. .. function:: decompress(data, /, wbits=MAX_WBITS, bufsize=DEF_BUF_SIZE) From 3bdee1288eb03ce1e21f2f047d94da0995d77f09 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Fri, 18 Mar 2022 11:10:53 -0700 Subject: [PATCH 5/8] indentation --- Doc/library/binascii.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 57011a025b5115..19efc2df9483db 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -119,10 +119,10 @@ The :mod:`binascii` module defines the following functions: crc = binascii.crc32(b" world", crc) print('crc32 = {:#010x}'.format(crc)) -.. versionchanged:: 3.0 - The result is always unsigned. - To generate the same numeric value when using Python 2 or earlier, - use ``crc32(data) & 0xffffffff``. + .. versionchanged:: 3.0 + The result is always unsigned. + To generate the same numeric value when using Python 2 or earlier, + use ``crc32(data) & 0xffffffff``. .. function:: b2a_hex(data[, sep[, bytes_per_sep=1]]) hexlify(data[, sep[, bytes_per_sep=1]]) From 10e83face1fdb4fa9c1dfde15c4d23ec4f924775 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Fri, 18 Mar 2022 11:11:24 -0700 Subject: [PATCH 6/8] indentation --- Doc/library/zlib.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index 0fb38390df0a68..f0c67d5ae2584a 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -41,10 +41,10 @@ The available exception and functions in this module are: the algorithm is designed for use as a checksum algorithm, it is not suitable for use as a general hash algorithm. -.. versionchanged:: 3.0 - The result is always unsigned. - To generate the same numeric value when using Python 2 or earlier, - use ``adler32(data) & 0xffffffff``. + .. versionchanged:: 3.0 + The result is always unsigned. + To generate the same numeric value when using Python 2 or earlier, + use ``adler32(data) & 0xffffffff``. .. function:: compress(data, /, level=-1, wbits=MAX_WBITS) @@ -135,10 +135,10 @@ The available exception and functions in this module are: the algorithm is designed for use as a checksum algorithm, it is not suitable for use as a general hash algorithm. -.. versionchanged:: 3.0 - The result is always unsigned. - To generate the same numeric value when using Python 2 or earlier, - use ``crc32(data) & 0xffffffff``. + .. versionchanged:: 3.0 + The result is always unsigned. + To generate the same numeric value when using Python 2 or earlier, + use ``crc32(data) & 0xffffffff``. .. function:: decompress(data, /, wbits=MAX_WBITS, bufsize=DEF_BUF_SIZE) From 7db926d37d29e2b15782d90b79e557b1193f6dbe Mon Sep 17 00:00:00 2001 From: Ma Lin Date: Sat, 19 Mar 2022 11:24:45 +0800 Subject: [PATCH 7/8] remove unit-tests --- Lib/test/test_binascii.py | 10 ---------- Lib/test/test_zlib.py | 11 ----------- 2 files changed, 21 deletions(-) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index e03722a5c4e6c6..b5aa847b943e69 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -3,7 +3,6 @@ import unittest import binascii import array -import random import re from test.support import warnings_helper @@ -242,15 +241,6 @@ def test_crc32(self): self.assertRaises(TypeError, binascii.crc32) - def test_random_crc32(self): - dat = random.randbytes(1234) - UINT_MAX = 0xFFFF_FFFF - - self.assertTrue(0 <= binascii.crc32(dat) <= UINT_MAX) - - self.assertEqual(binascii.crc32(dat, UINT_MAX+123), - binascii.crc32(dat, (UINT_MAX+123) & UINT_MAX)) - def test_hex(self): # test hexlification s = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000' diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index f1ee454af14831..f20aad051da960 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -104,17 +104,6 @@ def test_same_as_binascii_crc32(self): self.assertEqual(zlib.crc32(foo), crc) self.assertEqual(binascii.crc32(b'spam'), zlib.crc32(b'spam')) - def test_random_checksum(self): - dat = random.randbytes(1234) - UINT_MAX = 0xFFFF_FFFF - - self.assertTrue(0 <= zlib.adler32(dat) <= UINT_MAX) - self.assertTrue(0 <= zlib.crc32(dat) <= UINT_MAX) - - self.assertEqual(zlib.adler32(dat, UINT_MAX+123), - zlib.adler32(dat, (UINT_MAX+123) & UINT_MAX)) - self.assertEqual(zlib.crc32(dat, UINT_MAX+123), - zlib.crc32(dat, (UINT_MAX+123) & UINT_MAX)) # Issue #10276 - check that inputs >=4 GiB are handled correctly. class ChecksumBigBufferTestCase(unittest.TestCase): From 78a7940d4689843bdc2e830146a2348973a5f405 Mon Sep 17 00:00:00 2001 From: Ma Lin Date: Sat, 19 Mar 2022 11:29:54 +0800 Subject: [PATCH 8/8] improve NEWS --- .../Documentation/2022-03-17-13-35-28.bpo-47040.4Dn48U.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Documentation/2022-03-17-13-35-28.bpo-47040.4Dn48U.rst b/Misc/NEWS.d/next/Documentation/2022-03-17-13-35-28.bpo-47040.4Dn48U.rst index 73da25cb19f77c..e977fb5f59fbc9 100644 --- a/Misc/NEWS.d/next/Documentation/2022-03-17-13-35-28.bpo-47040.4Dn48U.rst +++ b/Misc/NEWS.d/next/Documentation/2022-03-17-13-35-28.bpo-47040.4Dn48U.rst @@ -1,3 +1,2 @@ -Internal cleanup to :func:`zlib.crc32` / :func:`binascii.crc32` to not use -an intermediate signed value. No functional change. Clarified the old Python -versions compatiblity note in the docstrings. +Clarified the old Python versions compatiblity note of :func:`binascii.crc32` / +:func:`zlib.adler32` / :func:`zlib.crc32` functions.