Thanks to visit codestin.com
Credit goes to github.com

Skip to content

gh-135239: simpler use of mutex in hashlib & co #135267

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 44 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
5d8c093
add common object head for hashlib/hmac objects
picnixz Jun 8, 2025
81e3046
simplify digest computation
picnixz Jun 8, 2025
7f9f7b7
refactor update logic
picnixz Jun 8, 2025
15a4f2f
refactor alloc() logic
picnixz Jun 8, 2025
5cd828a
finalizing touches
picnixz Jun 8, 2025
63db1de
correct mutex usage
picnixz Jun 15, 2025
ea033a3
Revert 5cd828acdcfef753aee5eec7e13f07682af40f46
picnixz Jun 15, 2025
77baa67
revert some constructor changes
picnixz Jun 15, 2025
902759f
unconditionally lock when performing HASH updates
picnixz Jun 16, 2025
dde68c4
Merge remote-tracking branch 'upstream/main' into perf/hashlib/mutex-…
picnixz Jun 16, 2025
05c1e66
post-merge
picnixz Jun 16, 2025
db57278
do not guard against empty buffers for now
picnixz Jun 16, 2025
ead20a1
consistency fixes
picnixz Jun 16, 2025
68a6bbc
remove unused import
picnixz Jun 16, 2025
68f297e
correct naming for locked/unlocked versions
picnixz Jun 16, 2025
9817c3d
debug?
picnixz Jun 16, 2025
7c6842b
Merge remote-tracking branch 'upstream/main' into perf/hashlib/mutex-…
picnixz Jun 16, 2025
c14c87d
simplify HMAC
picnixz Jun 16, 2025
bfb5436
release the GIL for large buffers
picnixz Jun 16, 2025
923c05f
restore GIL_MINSIZE
picnixz Jun 16, 2025
55b2afa
correctly lock objects
picnixz Jun 16, 2025
5cd60d1
improve tests
picnixz Jun 16, 2025
a2fcbd5
fixup HMAC
picnixz Jun 16, 2025
417cee1
fixup
picnixz Jun 16, 2025
f350501
GIL protection
picnixz Jun 16, 2025
5c4009d
show WASI errors
picnixz Jun 16, 2025
8aec797
fix WASI
picnixz Jun 16, 2025
6db58dc
fix compilation
picnixz Jun 16, 2025
b1f9463
fix compilation
picnixz Jun 16, 2025
491b922
fix warnings
picnixz Jun 16, 2025
c048975
sync
picnixz Jun 17, 2025
c9044d2
fixup format string
picnixz Jun 17, 2025
6c08f0d
address review
picnixz Jun 17, 2025
7fd1396
reudce diff
picnixz Jun 20, 2025
f400a11
Merge remote-tracking branch 'upstream/main' into perf/hashlib/mutex-…
picnixz Jun 20, 2025
5e2daa8
Merge remote-tracking branch 'upstream/main' into perf/hashlib/mutex-…
picnixz Jun 20, 2025
4f9729e
Merge remote-tracking branch 'upstream/main' into perf/hashlib/mutex-…
picnixz Jun 20, 2025
06aaee0
Merge branch 'main' into perf/hashlib/mutex-135239
picnixz Jun 21, 2025
977c807
fixup
picnixz Jun 21, 2025
6d66fef
fixup
picnixz Jun 21, 2025
c9db0b1
make the test suite less slow
picnixz Jun 21, 2025
6ffdd1c
fix test when GIL_MINSIZE is changed
picnixz Jun 21, 2025
98ec915
defer cosmetics
picnixz Jun 21, 2025
398ddb3
Update Lib/test/test_hashlib.py
picnixz Jun 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 37 additions & 15 deletions Lib/test/test_hashlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1043,40 +1043,59 @@ def test_gil(self):

def test_sha256_gil(self):
gil_minsize = hashlib_helper.find_gil_minsize(['_sha2', '_hashlib'])
data = b'1' + b'#' * gil_minsize + b'1'
expected = hashlib.sha256(data).hexdigest()

m = hashlib.sha256()
m.update(b'1')
m.update(b'#' * gil_minsize)
m.update(b'1')
self.assertEqual(
m.hexdigest(),
'1cfceca95989f51f658e3f3ffe7f1cd43726c9e088c13ee10b46f57cef135b94'
)
self.assertEqual(m.hexdigest(), expected)

m = hashlib.sha256(b'1' + b'#' * gil_minsize + b'1')
self.assertEqual(
m.hexdigest(),
'1cfceca95989f51f658e3f3ffe7f1cd43726c9e088c13ee10b46f57cef135b94'
)
@threading_helper.reap_threads
@threading_helper.requires_working_threading()
def test_threaded_hashing_fast(self):
# Same as test_threaded_hashing_slow() but only tests "fast" functions
# since otherwise test_hashlib.py becomes too slow during development.
for name in ['md5', 'sha1', 'sha256', 'sha3_256', 'blake2s']:
if constructor := getattr(hashlib, name, None):
with self.subTest(name):
self.do_test_threaded_hashing(constructor, is_shake=False)
if shake_128 := getattr(hashlib, 'shake_128', None):
self.do_test_threaded_hashing(shake_128, is_shake=True)

@requires_resource('cpu')
@threading_helper.reap_threads
@threading_helper.requires_working_threading()
def test_threaded_hashing(self):
def test_threaded_hashing_slow(self):
for algorithm, constructors in self.constructors_to_test.items():
is_shake = algorithm in self.shakes
for constructor in constructors:
with self.subTest(constructor.__name__, is_shake=is_shake):
self.do_test_threaded_hashing(constructor, is_shake)

def do_test_threaded_hashing(self, constructor, is_shake):
# Updating the same hash object from several threads at once
# using data chunk sizes containing the same byte sequences.
#
# If the internal locks are working to prevent multiple
# updates on the same object from running at once, the resulting
# hash will be the same as doing it single threaded upfront.
hasher = hashlib.sha1()
#
# Be careful when choosing num_threads, len(smallest_data)
# and len(data) // len(smallest_data) as the obtained chunk
# size needs to satisfy some conditions below.
num_threads = 5
smallest_data = b'swineflu'
smallest_data = os.urandom(8)
data = smallest_data * 200000
expected_hash = hashlib.sha1(data*num_threads).hexdigest()

h1 = constructor(usedforsecurity=False)
h2 = constructor(data * num_threads, usedforsecurity=False)

def hash_in_chunks(chunk_size):
index = 0
while index < len(data):
hasher.update(data[index:index + chunk_size])
h1.update(data[index:index + chunk_size])
index += chunk_size

threads = []
Expand All @@ -1093,7 +1112,10 @@ def hash_in_chunks(chunk_size):
for thread in threads:
thread.join()

self.assertEqual(expected_hash, hasher.hexdigest())
if is_shake:
self.assertEqual(h1.hexdigest(16), h2.hexdigest(16))
else:
self.assertEqual(h1.hexdigest(), h2.hexdigest())

def test_get_fips_mode(self):
fips_mode = self.is_fips_mode
Expand Down
81 changes: 23 additions & 58 deletions Modules/_hashopenssl.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,21 +278,15 @@ get_hashlib_state(PyObject *module)
}

typedef struct {
PyObject_HEAD
HASHLIB_OBJECT_HEAD
EVP_MD_CTX *ctx; /* OpenSSL message digest context */
// Prevents undefined behavior via multiple threads entering the C API.
bool use_mutex;
PyMutex mutex; /* OpenSSL context lock */
} HASHobject;

#define HASHobject_CAST(op) ((HASHobject *)(op))

typedef struct {
PyObject_HEAD
HASHLIB_OBJECT_HEAD
HMAC_CTX *ctx; /* OpenSSL hmac context */
// Prevents undefined behavior via multiple threads entering the C API.
bool use_mutex;
PyMutex mutex; /* HMAC context lock */
} HMACobject;

#define HMACobject_CAST(op) ((HMACobject *)(op))
Expand Down Expand Up @@ -700,9 +694,9 @@ static int
_hashlib_HASH_copy_locked(HASHobject *self, EVP_MD_CTX *new_ctx_p)
{
int result;
ENTER_HASHLIB(self);
HASHLIB_ACQUIRE_LOCK(self);
result = EVP_MD_CTX_copy(new_ctx_p, self->ctx);
LEAVE_HASHLIB(self);
HASHLIB_RELEASE_LOCK(self);
if (result == 0) {
notify_smart_ssl_error_occurred_in(Py_STRINGIFY(EVP_MD_CTX_copy));
return -1;
Expand Down Expand Up @@ -802,27 +796,13 @@ _hashlib_HASH_update_impl(HASHobject *self, PyObject *obj)
{
int result;
Py_buffer view;

GET_BUFFER_VIEW_OR_ERROUT(obj, &view);

if (!self->use_mutex && view.len >= HASHLIB_GIL_MINSIZE) {
self->use_mutex = true;
}
if (self->use_mutex) {
Py_BEGIN_ALLOW_THREADS
PyMutex_Lock(&self->mutex);
result = _hashlib_HASH_hash(self, view.buf, view.len);
PyMutex_Unlock(&self->mutex);
Py_END_ALLOW_THREADS
} else {
result = _hashlib_HASH_hash(self, view.buf, view.len);
}

HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(
self, view.len,
result = _hashlib_HASH_hash(self, view.buf, view.len)
);
PyBuffer_Release(&view);

if (result == -1)
return NULL;
Py_RETURN_NONE;
return result < 0 ? NULL : Py_None;
}

static PyMethodDef HASH_methods[] = {
Expand Down Expand Up @@ -1144,15 +1124,12 @@ _hashlib_HASH(PyObject *module, const char *digestname, PyObject *data_obj,
}

if (view.buf && view.len) {
if (view.len >= HASHLIB_GIL_MINSIZE) {
/* We do not initialize self->lock here as this is the constructor
* where it is not yet possible to have concurrent access. */
Py_BEGIN_ALLOW_THREADS
result = _hashlib_HASH_hash(self, view.buf, view.len);
Py_END_ALLOW_THREADS
} else {
result = _hashlib_HASH_hash(self, view.buf, view.len);
}
/* Do not use self->mutex here as this is the constructor
* where it is not yet possible to have concurrent access. */
HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(
view.len,
result = _hashlib_HASH_hash(self, view.buf, view.len)
);
if (result == -1) {
assert(PyErr_Occurred());
Py_CLEAR(self);
Expand Down Expand Up @@ -1813,9 +1790,9 @@ static int
locked_HMAC_CTX_copy(HMAC_CTX *new_ctx_p, HMACobject *self)
{
int result;
ENTER_HASHLIB(self);
HASHLIB_ACQUIRE_LOCK(self);
result = HMAC_CTX_copy(new_ctx_p, self->ctx);
LEAVE_HASHLIB(self);
HASHLIB_RELEASE_LOCK(self);
if (result == 0) {
notify_smart_ssl_error_occurred_in(Py_STRINGIFY(HMAC_CTX_copy));
return -1;
Expand Down Expand Up @@ -1846,24 +1823,12 @@ _hmac_update(HMACobject *self, PyObject *obj)
Py_buffer view = {0};

GET_BUFFER_VIEW_OR_ERROR(obj, &view, return 0);

if (!self->use_mutex && view.len >= HASHLIB_GIL_MINSIZE) {
self->use_mutex = true;
}
if (self->use_mutex) {
Py_BEGIN_ALLOW_THREADS
PyMutex_Lock(&self->mutex);
r = HMAC_Update(self->ctx,
(const unsigned char *)view.buf,
(size_t)view.len);
PyMutex_Unlock(&self->mutex);
Py_END_ALLOW_THREADS
} else {
r = HMAC_Update(self->ctx,
(const unsigned char *)view.buf,
(size_t)view.len);
}

HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(
self, view.len,
r = HMAC_Update(
self->ctx, (const unsigned char *)view.buf, (size_t)view.len
)
);
PyBuffer_Release(&view);

if (r == 0) {
Expand Down
57 changes: 20 additions & 37 deletions Modules/blake2module.c
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ type_to_impl(PyTypeObject *type)
}

typedef struct {
PyObject_HEAD
HASHLIB_OBJECT_HEAD
union {
Hacl_Hash_Blake2s_state_t *blake2s_state;
Hacl_Hash_Blake2b_state_t *blake2b_state;
Expand All @@ -364,8 +364,6 @@ typedef struct {
#endif
};
blake2_impl impl;
bool use_mutex;
PyMutex mutex;
} Blake2Object;

#define _Blake2Object_CAST(op) ((Blake2Object *)(op))
Expand Down Expand Up @@ -422,7 +420,7 @@ new_Blake2Object(PyTypeObject *type)
} while (0)

static void
update(Blake2Object *self, uint8_t *buf, Py_ssize_t len)
blake2_update_unlocked(Blake2Object *self, uint8_t *buf, Py_ssize_t len)
{
switch (self->impl) {
// blake2b_256_state and blake2s_128_state must be if'd since
Expand Down Expand Up @@ -646,14 +644,12 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size,
if (data != NULL) {
Py_buffer buf;
GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error);
if (buf.len >= HASHLIB_GIL_MINSIZE) {
Py_BEGIN_ALLOW_THREADS
update(self, buf.buf, buf.len);
Py_END_ALLOW_THREADS
}
else {
update(self, buf.buf, buf.len);
}
/* Do not use self->mutex here as this is the constructor
* where it is not yet possible to have concurrent access. */
HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(
buf.len,
blake2_update_unlocked(self, buf.buf, buf.len)
);
PyBuffer_Release(&buf);
}

Expand Down Expand Up @@ -744,7 +740,7 @@ py_blake2s_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size,
}

static int
blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy)
blake2_blake2b_copy_unlocked(Blake2Object *self, Blake2Object *cpy)
{
assert(cpy != NULL);
#define BLAKE2_COPY(TYPE, STATE_ATTR) \
Expand Down Expand Up @@ -801,9 +797,9 @@ _blake2_blake2b_copy_impl(Blake2Object *self)
return NULL;
}

ENTER_HASHLIB(self);
rc = blake2_blake2b_copy_locked(self, cpy);
LEAVE_HASHLIB(self);
HASHLIB_ACQUIRE_LOCK(self);
rc = blake2_blake2b_copy_unlocked(self, cpy);
HASHLIB_RELEASE_LOCK(self);
if (rc < 0) {
Py_DECREF(cpy);
return NULL;
Expand All @@ -825,25 +821,12 @@ _blake2_blake2b_update_impl(Blake2Object *self, PyObject *data)
/*[clinic end generated code: output=99330230068e8c99 input=ffc4aa6a6a225d31]*/
{
Py_buffer buf;

GET_BUFFER_VIEW_OR_ERROUT(data, &buf);

if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) {
self->use_mutex = true;
}
if (self->use_mutex) {
Py_BEGIN_ALLOW_THREADS
PyMutex_Lock(&self->mutex);
update(self, buf.buf, buf.len);
PyMutex_Unlock(&self->mutex);
Py_END_ALLOW_THREADS
}
else {
update(self, buf.buf, buf.len);
}

HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(
self, buf.len,
blake2_update_unlocked(self, buf.buf, buf.len)
);
PyBuffer_Release(&buf);

Py_RETURN_NONE;
}

Expand Down Expand Up @@ -881,9 +864,9 @@ _blake2_blake2b_digest_impl(Blake2Object *self)
/*[clinic end generated code: output=31ab8ad477f4a2f7 input=7d21659e9c5fff02]*/
{
uint8_t digest_length = 0, digest[HACL_HASH_BLAKE2B_OUT_BYTES];
ENTER_HASHLIB(self);
HASHLIB_ACQUIRE_LOCK(self);
digest_length = blake2_blake2b_compute_digest(self, digest);
LEAVE_HASHLIB(self);
HASHLIB_RELEASE_LOCK(self);
return PyBytes_FromStringAndSize((const char *)digest, digest_length);
}

Expand All @@ -898,9 +881,9 @@ _blake2_blake2b_hexdigest_impl(Blake2Object *self)
/*[clinic end generated code: output=5ef54b138db6610a input=76930f6946351f56]*/
{
uint8_t digest_length = 0, digest[HACL_HASH_BLAKE2B_OUT_BYTES];
ENTER_HASHLIB(self);
HASHLIB_ACQUIRE_LOCK(self);
digest_length = blake2_blake2b_compute_digest(self, digest);
LEAVE_HASHLIB(self);
HASHLIB_RELEASE_LOCK(self);
return _Py_strhex((const char *)digest, digest_length);
}

Expand Down
Loading
Loading