From 63d5d188683d403882cade64590a52eb3dd3dc74 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 20 Mar 2022 13:28:36 -0700 Subject: [PATCH] [3.10] bpo-38256: Fix binascii.crc32 large input. Inputs >= 4GiB to `binascii.crc32(...)` when compiled to use the zlib crc32 implementation (the norm on POSIX) no longer return the wrong result. --- Lib/test/test_binascii.py | 10 ++++++++- .../2022-03-19-15-54-41.bpo-38256.FoMbjE.rst | 5 +++++ Modules/binascii.c | 22 +++++++++++-------- 3 files changed, 27 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 4d1bf2cce1f1e3..13d4e67e586e98 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -4,7 +4,7 @@ import binascii import array import re -from test.support import warnings_helper +from test.support import bigmemtest, _1G, _4G, warnings_helper # Note: "*_hex" functions are aliases for "(un)hexlify" @@ -449,6 +449,14 @@ class BytearrayBinASCIITest(BinASCIITest): class MemoryviewBinASCIITest(BinASCIITest): type2test = memoryview +class ChecksumBigBufferTestCase(unittest.TestCase): + """bpo-38256 - check that inputs >=4 GiB are handled correctly.""" + + @bigmemtest(size=_4G + 4, memuse=1, dry_run=False) + def test_big_buffer(self, size): + data = b"nyan" * (_1G + 1) + self.assertEqual(binascii.crc32(data), 1044521549) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst b/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst new file mode 100644 index 00000000000000..d9b57513b0631d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst @@ -0,0 +1,5 @@ +Fix :func:`binascii.crc32` when it is compiled to use zlib'c crc32 to +work properly on inputs 4+GiB in length instead of returning the wrong +result. The workaround prior to this was to always feed the function +data in increments smaller than 4GiB or to just call the zlib module +function. diff --git a/Modules/binascii.c b/Modules/binascii.c index 1f3248b6049b31..3777580a79f2a9 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1120,16 +1120,20 @@ binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc) /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/ #ifdef USE_ZLIB_CRC32 -/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */ +/* The same core as zlibmodule.c zlib_crc32_impl. */ { - const Byte *buf; - Py_ssize_t len; - int signed_val; - - buf = (Byte*)data->buf; - len = data->len; - signed_val = crc32(crc, buf, len); - return (unsigned int)signed_val & 0xffffffffU; + unsigned char *buf = data->buf; + Py_ssize_t len = data->len; + + /* Avoid truncation of length for very large buffers. crc32() takes + length as an unsigned int, which may be narrower than Py_ssize_t. */ + while ((size_t)len > UINT_MAX) { + crc = crc32(crc, buf, UINT_MAX); + buf += (size_t) UINT_MAX; + len -= (size_t) UINT_MAX; + } + crc = crc32(crc, buf, (unsigned int)len); + return crc & 0xffffffff; } #else /* USE_ZLIB_CRC32 */ { /* By Jim Ahlstrom; All rights transferred to CNRI */