Thanks to visit codestin.com
Credit goes to github.com

Skip to content

bpo-38256: Fix binascii.crc32() when inputs are 4+GiB #32000

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion Lib/test/test_binascii.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import binascii
import array
import re
from test.support import warnings_helper
from test.support import bigmemtest, _1G, _4G, warnings_helper


# Note: "*_hex" functions are aliases for "(un)hexlify"
Expand Down Expand Up @@ -441,6 +441,14 @@ class BytearrayBinASCIITest(BinASCIITest):
class MemoryviewBinASCIITest(BinASCIITest):
type2test = memoryview

class ChecksumBigBufferTestCase(unittest.TestCase):
"""bpo-38256 - check that inputs >=4 GiB are handled correctly."""

@bigmemtest(size=_4G + 4, memuse=1, dry_run=False)
def test_big_buffer(self, size):
data = b"nyan" * (_1G + 1)
self.assertEqual(binascii.crc32(data), 1044521549)


if __name__ == "__main__":
unittest.main()
14 changes: 14 additions & 0 deletions Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Fix :func:`binascii.crc32` when it is compiled to use zlib'c crc32 to
work properly on inputs 4+GiB in length instead of returning the wrong
result. The workaround prior to this was to always feed the function
data in increments smaller than 4GiB or to just call the zlib module
function.

We also have :func:`binascii.crc32` release the GIL when computing
on larger inputs as :func:`zlib.crc32` and :mod:`hashlib` do.

This also boosts performance on Windows as it now uses the zlib crc32
implementation for :func:`binascii.crc32` for a 2-3x speedup.

That the stdlib has a crc32 API in two modules is a known historical
oddity. This moves us closer to a single implementation behind them.
75 changes: 51 additions & 24 deletions Modules/binascii.c
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,21 @@ static const unsigned int crc_32_tab[256] = {
0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
0x2d02ef8dU
};

static unsigned int
internal_crc32(const unsigned char *bin_data, Py_ssize_t len, unsigned int crc)
{ /* By Jim Ahlstrom; All rights transferred to CNRI */
unsigned int result;

crc = ~ crc;
while (len-- > 0) {
crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
/* Note: (crc >> 8) MUST zero fill on left */
}

result = (crc ^ 0xFFFFFFFF);
return result & 0xffffffff;
}
#endif /* USE_ZLIB_CRC32 */

/*[clinic input]
Expand All @@ -754,34 +769,46 @@ binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
/*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/

#ifdef USE_ZLIB_CRC32
/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
/* This is the same as zlibmodule.c zlib_crc32_impl. It exists in two
* modules for historical reasons. */
{
const Byte *buf;
Py_ssize_t len;
int signed_val;

buf = (Byte*)data->buf;
len = data->len;
signed_val = crc32(crc, buf, len);
return (unsigned int)signed_val & 0xffffffffU;
/* Releasing the GIL for very small buffers is inefficient
and may lower performance */
if (data->len > 1024*5) {
unsigned char *buf = data->buf;
Py_ssize_t len = data->len;

Py_BEGIN_ALLOW_THREADS
/* Avoid truncation of length for very large buffers. crc32() takes
length as an unsigned int, which may be narrower than Py_ssize_t. */
while ((size_t)len > UINT_MAX) {
crc = crc32(crc, buf, UINT_MAX);
buf += (size_t) UINT_MAX;
len -= (size_t) UINT_MAX;
}
crc = crc32(crc, buf, (unsigned int)len);
Py_END_ALLOW_THREADS
} else {
crc = crc32(crc, data->buf, (unsigned int)data->len);
}
return crc & 0xffffffff;
}
#else /* USE_ZLIB_CRC32 */
{ /* By Jim Ahlstrom; All rights transferred to CNRI */
const unsigned char *bin_data;
Py_ssize_t len;
unsigned int result;

bin_data = data->buf;
len = data->len;

crc = ~ crc;
while (len-- > 0) {
crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
/* Note: (crc >> 8) MUST zero fill on left */
{
const unsigned char *bin_data = data->buf;
Py_ssize_t len = data->len;

/* Releasing the GIL for very small buffers is inefficient
and may lower performance */
if (len > 1024*5) {
unsigned int result;
Py_BEGIN_ALLOW_THREADS
result = internal_crc32(bin_data, len, crc);
Py_END_ALLOW_THREADS
return result;
} else {
return internal_crc32(bin_data, len, crc);
}

result = (crc ^ 0xFFFFFFFF);
return result & 0xffffffff;
}
#endif /* USE_ZLIB_CRC32 */

Expand Down
11 changes: 8 additions & 3 deletions Modules/clinic/zlibmodule.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions Modules/zlibmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1420,7 +1420,7 @@ zlib_adler32_impl(PyObject *module, Py_buffer *data, unsigned int value)
}

/*[clinic input]
zlib.crc32
zlib.crc32 -> unsigned_int

data: Py_buffer
value: unsigned_int(bitwise=True) = 0
Expand All @@ -1432,9 +1432,9 @@ Compute a CRC-32 checksum of data.
The returned checksum is an integer.
[clinic start generated code]*/

static PyObject *
static unsigned int
zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value)
/*[clinic end generated code: output=63499fa20af7ea25 input=26c3ed430fa00b4c]*/
/*[clinic end generated code: output=b217562e4fe6d6a6 input=1229cb2fb5ea948a]*/
{
/* Releasing the GIL for very small buffers is inefficient
and may lower performance */
Expand All @@ -1455,7 +1455,7 @@ zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value)
} else {
value = crc32(value, data->buf, (unsigned int)data->len);
}
return PyLong_FromUnsignedLong(value & 0xffffffffU);
return value;
}


Expand Down
4 changes: 3 additions & 1 deletion PCbuild/pythoncore.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,9 @@
<ClCompile Include="..\Modules\arraymodule.c" />
<ClCompile Include="..\Modules\atexitmodule.c" />
<ClCompile Include="..\Modules\audioop.c" />
<ClCompile Include="..\Modules\binascii.c" />
<ClCompile Include="..\Modules\binascii.c">
<PreprocessorDefinitions>USE_ZLIB_CRC32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<ClCompile Include="..\Modules\cmathmodule.c" />
<ClCompile Include="..\Modules\_datetimemodule.c" />
<ClCompile Include="..\Modules\errnomodule.c" />
Expand Down