diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 4d1bf2cce1f1e3..13d4e67e586e98 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -4,7 +4,7 @@ import binascii import array import re -from test.support import warnings_helper +from test.support import bigmemtest, _1G, _4G, warnings_helper # Note: "*_hex" functions are aliases for "(un)hexlify" @@ -449,6 +449,14 @@ class BytearrayBinASCIITest(BinASCIITest): class MemoryviewBinASCIITest(BinASCIITest): type2test = memoryview +class ChecksumBigBufferTestCase(unittest.TestCase): + """bpo-38256 - check that inputs >=4 GiB are handled correctly.""" + + @bigmemtest(size=_4G + 4, memuse=1, dry_run=False) + def test_big_buffer(self, size): + data = b"nyan" * (_1G + 1) + self.assertEqual(binascii.crc32(data), 1044521549) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst b/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst new file mode 100644 index 00000000000000..d9b57513b0631d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst @@ -0,0 +1,5 @@ +Fix :func:`binascii.crc32` when it is compiled to use zlib'c crc32 to +work properly on inputs 4+GiB in length instead of returning the wrong +result. The workaround prior to this was to always feed the function +data in increments smaller than 4GiB or to just call the zlib module +function. diff --git a/Modules/binascii.c b/Modules/binascii.c index 1f3248b6049b31..3777580a79f2a9 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1120,16 +1120,20 @@ binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc) /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/ #ifdef USE_ZLIB_CRC32 -/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */ +/* The same core as zlibmodule.c zlib_crc32_impl. */ { - const Byte *buf; - Py_ssize_t len; - int signed_val; - - buf = (Byte*)data->buf; - len = data->len; - signed_val = crc32(crc, buf, len); - return (unsigned int)signed_val & 0xffffffffU; + unsigned char *buf = data->buf; + Py_ssize_t len = data->len; + + /* Avoid truncation of length for very large buffers. crc32() takes + length as an unsigned int, which may be narrower than Py_ssize_t. */ + while ((size_t)len > UINT_MAX) { + crc = crc32(crc, buf, UINT_MAX); + buf += (size_t) UINT_MAX; + len -= (size_t) UINT_MAX; + } + crc = crc32(crc, buf, (unsigned int)len); + return crc & 0xffffffff; } #else /* USE_ZLIB_CRC32 */ { /* By Jim Ahlstrom; All rights transferred to CNRI */