From 5d8c093b4937bc41cff7a911464e8ce5bab3700c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Jun 2025 16:24:43 +0200 Subject: [PATCH 01/41] add common object head for hashlib/hmac objects --- Modules/hashlib.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Modules/hashlib.h b/Modules/hashlib.h index e82ec92be25c57..23475afb683d93 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -49,6 +49,14 @@ */ #include "pythread.h" + +#define HASHLIB_OBJECT_HEAD \ + PyObject_HEAD \ + /* prevent undefined behavior via multiple + * threads entering the C API */ \ + bool use_mutex; \ + PyMutex mutex; + #define ENTER_HASHLIB(obj) \ if ((obj)->use_mutex) { \ PyMutex_Lock(&(obj)->mutex); \ From 81e30463b76cac7266bf88af645f390da7263da6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Jun 2025 16:41:10 +0200 Subject: [PATCH 02/41] simplify digest computation --- Modules/md5module.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/Modules/md5module.c b/Modules/md5module.c index 9b5ea2d6e02605..46f23ea8e32151 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -22,6 +22,7 @@ #include "Python.h" #include "hashlib.h" +#include "pycore_strhex.h" // _Py_strhex() /*[clinic input] module _md5 @@ -126,6 +127,14 @@ MD5Type_copy_impl(MD5object *self, PyTypeObject *cls) return (PyObject *)newobj; } +static void +md5_digest_compute_cond_lock(MD5object *self, uint8_t *digest) +{ + ENTER_HASHLIB(self); + Hacl_Hash_MD5_digest(self->hash_state, digest); + LEAVE_HASHLIB(self); +} + /*[clinic input] MD5Type.digest @@ -136,10 +145,8 @@ static PyObject * MD5Type_digest_impl(MD5object *self) /*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/ { - unsigned char digest[MD5_DIGESTSIZE]; - ENTER_HASHLIB(self); - Hacl_Hash_MD5_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + uint8_t digest[MD5_DIGESTSIZE]; + md5_digest_compute_cond_lock(self, digest); return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE); } @@ -153,20 +160,9 @@ static PyObject * MD5Type_hexdigest_impl(MD5object *self) /*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/ { - unsigned char digest[MD5_DIGESTSIZE]; - ENTER_HASHLIB(self); - Hacl_Hash_MD5_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); - - const char *hexdigits = "0123456789abcdef"; - char digest_hex[MD5_DIGESTSIZE * 2]; - char *str = digest_hex; - for (size_t i=0; i < MD5_DIGESTSIZE; i++) { - unsigned char byte = digest[i]; - *str++ = hexdigits[byte >> 4]; - *str++ = hexdigits[byte & 0x0f]; - } - return PyUnicode_FromStringAndSize(digest_hex, sizeof(digest_hex)); + uint8_t digest[MD5_DIGESTSIZE]; + md5_digest_compute_cond_lock(self, digest); + return _Py_strhex((const char *)digest, MD5_DIGESTSIZE); } static void From 7f9f7b746d6dd5b0add1826138c9d8c8f3edc48e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Jun 2025 16:41:59 +0200 Subject: [PATCH 03/41] refactor update logic --- Modules/md5module.c | 51 ++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/Modules/md5module.c b/Modules/md5module.c index 46f23ea8e32151..2cdf889589415c 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -166,7 +166,7 @@ MD5Type_hexdigest_impl(MD5object *self) } static void -update(Hacl_Hash_MD5_state_t *state, uint8_t *buf, Py_ssize_t len) +_hacl_md5_update(Hacl_Hash_MD5_state_t *state, uint8_t *buf, Py_ssize_t len) { /* * Note: we explicitly ignore the error code on the basis that it would @@ -184,6 +184,36 @@ update(Hacl_Hash_MD5_state_t *state, uint8_t *buf, Py_ssize_t len) (void)Hacl_Hash_MD5_update(state, buf, (uint32_t)len); } +static void +md5_update_state_with_lock(MD5object *self, uint8_t *buf, Py_ssize_t len) +{ + Py_BEGIN_ALLOW_THREADS + PyMutex_Lock(&self->mutex); // unconditionally acquire a lock + _hacl_md5_update(self->hash_state, buf, len); + PyMutex_Unlock(&self->mutex); + Py_END_ALLOW_THREADS +} + +static void +md5_update_state_cond_lock(MD5object *self, uint8_t *buf, Py_ssize_t len) +{ + ENTER_HASHLIB(self); // conditionally acquire a lock + _hacl_md5_update(self->hash_state, buf, len); + LEAVE_HASHLIB(self); +} + +static inline void +md5_update_state(MD5object *self, uint8_t *buf, Py_ssize_t len) +{ + assert(buf != 0); + assert(len >= 0); + if (len != 0) { + len < HASHLIB_GIL_MINSIZE + ? md5_update_state_cond_lock(self, buf, len) + : md5_update_state_with_lock(self, buf, len); + } +} + /*[clinic input] MD5Type.update @@ -200,20 +230,7 @@ MD5Type_update_impl(MD5object *self, PyObject *obj) Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update(self->hash_state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - update(self->hash_state, buf.buf, buf.len); - } - + md5_update_state(self, buf.buf, buf.len); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -319,11 +336,11 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, /* We do not initialize self->lock here as this is the constructor * where it is not yet possible to have concurrent access. */ Py_BEGIN_ALLOW_THREADS - update(new->hash_state, buf.buf, buf.len); + _hacl_md5_update(new->hash_state, buf.buf, buf.len); Py_END_ALLOW_THREADS } else { - update(new->hash_state, buf.buf, buf.len); + _hacl_md5_update(new->hash_state, buf.buf, buf.len); } PyBuffer_Release(&buf); } From 15a4f2fad0e621e9f07ff342e9188678165679bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Jun 2025 16:42:18 +0200 Subject: [PATCH 04/41] refactor alloc() logic --- Modules/md5module.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/Modules/md5module.c b/Modules/md5module.c index 2cdf889589415c..94029922a6e41d 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -39,10 +39,7 @@ class MD5Type "MD5object *" "&PyType_Type" typedef struct { - PyObject_HEAD - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; + HASHLIB_OBJECT_HEAD Hacl_Hash_MD5_state_t *hash_state; } MD5object; @@ -308,30 +305,20 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, } MD5object *new; - Py_buffer buf; - - if (string) { - GET_BUFFER_VIEW_OR_ERROUT(string, &buf); - } - MD5State *st = md5_get_state(module); if ((new = newMD5object(st)) == NULL) { - if (string) { - PyBuffer_Release(&buf); - } return NULL; } new->hash_state = Hacl_Hash_MD5_malloc(); if (new->hash_state == NULL) { Py_DECREF(new); - if (string) { - PyBuffer_Release(&buf); - } return PyErr_NoMemory(); } if (string) { + Py_buffer buf; + GET_BUFFER_VIEW_OR_ERROR(string, &buf, goto error); if (buf.len >= HASHLIB_GIL_MINSIZE) { /* We do not initialize self->lock here as this is the constructor * where it is not yet possible to have concurrent access. */ @@ -346,6 +333,10 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, } return (PyObject *)new; + +error: + Py_XDECREF(new); + return NULL; } From 5cd828acdcfef753aee5eec7e13f07682af40f46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Jun 2025 16:46:21 +0200 Subject: [PATCH 05/41] finalizing touches --- Modules/md5module.c | 114 +++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 59 deletions(-) diff --git a/Modules/md5module.c b/Modules/md5module.c index 94029922a6e41d..69f787ca5ebc32 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -24,12 +24,6 @@ #include "hashlib.h" #include "pycore_strhex.h" // _Py_strhex() -/*[clinic input] -module _md5 -class MD5Type "MD5object *" "&PyType_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/ - /* The MD5 block size and message digest sizes, in bytes */ #define MD5_BLOCKSIZE 64 @@ -37,62 +31,64 @@ class MD5Type "MD5object *" "&PyType_Type" #include "_hacl/Hacl_Hash_MD5.h" - typedef struct { HASHLIB_OBJECT_HEAD - Hacl_Hash_MD5_state_t *hash_state; + Hacl_Hash_MD5_state_t *state; } MD5object; #define _MD5object_CAST(op) ((MD5object *)(op)) -#include "clinic/md5module.c.h" - - typedef struct { - PyTypeObject* md5_type; -} MD5State; + PyTypeObject *md5_type; +} md5module_state; -static inline MD5State* -md5_get_state(PyObject *module) +static inline md5module_state * +get_md5module_state(PyObject *module) { void *state = PyModule_GetState(module); assert(state != NULL); - return (MD5State *)state; + return (md5module_state *)state; } +/*[clinic input] +module _md5 +class MD5Type "MD5object *" "&PyType_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/ + +#include "clinic/md5module.c.h" + static MD5object * -newMD5object(MD5State * st) +newMD5object(md5module_state *st) { - MD5object *md5 = PyObject_GC_New(MD5object, st->md5_type); - if (!md5) { + MD5object *self = PyObject_GC_New(MD5object, st->md5_type); + if (self == NULL) { return NULL; } - HASHLIB_INIT_MUTEX(md5); - - PyObject_GC_Track(md5); - return md5; + HASHLIB_INIT_MUTEX(self); + PyObject_GC_Track(self); + return self; } /* Internal methods for a hash object */ static int -MD5_traverse(PyObject *ptr, visitproc visit, void *arg) +MD5_traverse(PyObject *op, visitproc visit, void *arg) { - Py_VISIT(Py_TYPE(ptr)); + Py_VISIT(Py_TYPE(op)); return 0; } static void MD5_dealloc(PyObject *op) { - MD5object *ptr = _MD5object_CAST(op); - Hacl_Hash_MD5_free(ptr->hash_state); + MD5object *self = _MD5object_CAST(op); + Hacl_Hash_MD5_free(self->state); PyTypeObject *tp = Py_TYPE(op); - PyObject_GC_UnTrack(ptr); - PyObject_GC_Del(ptr); + PyObject_GC_UnTrack(self); + PyObject_GC_Del(self); Py_DECREF(tp); } - /* External methods for a hash object */ /*[clinic input] @@ -107,28 +103,28 @@ static PyObject * MD5Type_copy_impl(MD5object *self, PyTypeObject *cls) /*[clinic end generated code: output=bf055e08244bf5ee input=d89087dcfb2a8620]*/ { - MD5State *st = PyType_GetModuleState(cls); + md5module_state *st = PyType_GetModuleState(cls); - MD5object *newobj; - if ((newobj = newMD5object(st)) == NULL) { + MD5object *copy = newMD5object(st); + if (copy == NULL) { return NULL; } ENTER_HASHLIB(self); - newobj->hash_state = Hacl_Hash_MD5_copy(self->hash_state); + copy->state = Hacl_Hash_MD5_copy(self->state); LEAVE_HASHLIB(self); - if (newobj->hash_state == NULL) { - Py_DECREF(self); + if (copy->state == NULL) { + Py_DECREF(copy); return PyErr_NoMemory(); } - return (PyObject *)newobj; + return (PyObject *)copy; } static void md5_digest_compute_cond_lock(MD5object *self, uint8_t *digest) { ENTER_HASHLIB(self); - Hacl_Hash_MD5_digest(self->hash_state, digest); + Hacl_Hash_MD5_digest(self->state, digest); LEAVE_HASHLIB(self); } @@ -186,7 +182,7 @@ md5_update_state_with_lock(MD5object *self, uint8_t *buf, Py_ssize_t len) { Py_BEGIN_ALLOW_THREADS PyMutex_Lock(&self->mutex); // unconditionally acquire a lock - _hacl_md5_update(self->hash_state, buf, len); + _hacl_md5_update(self->state, buf, len); PyMutex_Unlock(&self->mutex); Py_END_ALLOW_THREADS } @@ -195,7 +191,7 @@ static void md5_update_state_cond_lock(MD5object *self, uint8_t *buf, Py_ssize_t len) { ENTER_HASHLIB(self); // conditionally acquire a lock - _hacl_md5_update(self->hash_state, buf, len); + _hacl_md5_update(self->state, buf, len); LEAVE_HASHLIB(self); } @@ -237,7 +233,7 @@ static PyMethodDef MD5_methods[] = { MD5TYPE_DIGEST_METHODDEF MD5TYPE_HEXDIGEST_METHODDEF MD5TYPE_UPDATE_METHODDEF - {NULL, NULL} /* sentinel */ + {NULL, NULL} /* sentinel */ }; static PyObject * @@ -262,7 +258,7 @@ static PyGetSetDef MD5_getseters[] = { {"block_size", MD5_get_block_size, NULL, NULL, NULL}, {"name", MD5_get_name, NULL, NULL, NULL}, {"digest_size", md5_get_digest_size, NULL, NULL, NULL}, - {NULL} /* Sentinel */ + {NULL} /* sentinel */ }; static PyType_Slot md5_type_slots[] = { @@ -270,12 +266,12 @@ static PyType_Slot md5_type_slots[] = { {Py_tp_methods, MD5_methods}, {Py_tp_getset, MD5_getseters}, {Py_tp_traverse, MD5_traverse}, - {0,0} + {0, 0} }; static PyType_Spec md5_type_spec = { .name = "_md5.md5", - .basicsize = sizeof(MD5object), + .basicsize = sizeof(MD5object), .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_HAVE_GC), .slots = md5_type_slots @@ -304,15 +300,15 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, return NULL; } - MD5object *new; - MD5State *st = md5_get_state(module); - if ((new = newMD5object(st)) == NULL) { + md5module_state *st = get_md5module_state(module); + MD5object *self = newMD5object(st); + if (self == NULL) { return NULL; } - new->hash_state = Hacl_Hash_MD5_malloc(); - if (new->hash_state == NULL) { - Py_DECREF(new); + self->state = Hacl_Hash_MD5_malloc(); + if (self->state == NULL) { + Py_DECREF(self); return PyErr_NoMemory(); } @@ -323,19 +319,19 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, /* We do not initialize self->lock here as this is the constructor * where it is not yet possible to have concurrent access. */ Py_BEGIN_ALLOW_THREADS - _hacl_md5_update(new->hash_state, buf.buf, buf.len); + _hacl_md5_update(self->state, buf.buf, buf.len); Py_END_ALLOW_THREADS } else { - _hacl_md5_update(new->hash_state, buf.buf, buf.len); + _hacl_md5_update(self->state, buf.buf, buf.len); } PyBuffer_Release(&buf); } - return (PyObject *)new; + return (PyObject *)self; error: - Py_XDECREF(new); + Py_XDECREF(self); return NULL; } @@ -344,13 +340,13 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, static struct PyMethodDef MD5_functions[] = { _MD5_MD5_METHODDEF - {NULL, NULL} /* Sentinel */ + {NULL, NULL} /* sentinel */ }; static int _md5_traverse(PyObject *module, visitproc visit, void *arg) { - MD5State *state = md5_get_state(module); + md5module_state *state = get_md5module_state(module); Py_VISIT(state->md5_type); return 0; } @@ -358,7 +354,7 @@ _md5_traverse(PyObject *module, visitproc visit, void *arg) static int _md5_clear(PyObject *module) { - MD5State *state = md5_get_state(module); + md5module_state *state = get_md5module_state(module); Py_CLEAR(state->md5_type); return 0; } @@ -373,7 +369,7 @@ _md5_free(void *module) static int md5_exec(PyObject *m) { - MD5State *st = md5_get_state(m); + md5module_state *st = get_md5module_state(m); st->md5_type = (PyTypeObject *)PyType_FromModuleAndSpec( m, &md5_type_spec, NULL); @@ -399,7 +395,7 @@ static PyModuleDef_Slot _md5_slots[] = { static struct PyModuleDef _md5module = { PyModuleDef_HEAD_INIT, .m_name = "_md5", - .m_size = sizeof(MD5State), + .m_size = sizeof(md5module_state), .m_methods = MD5_functions, .m_slots = _md5_slots, .m_traverse = _md5_traverse, From 63db1de8317c569cb6f251c3546be9b98c71bd4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 15 Jun 2025 10:56:34 +0200 Subject: [PATCH 06/41] correct mutex usage --- Modules/hashlib.h | 51 ++++++++++++++++++++++++++------------------- Modules/md5module.c | 23 +++++++++++++------- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/Modules/hashlib.h b/Modules/hashlib.h index 23475afb683d93..fe12acb27ce036 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -50,33 +50,42 @@ #include "pythread.h" -#define HASHLIB_OBJECT_HEAD \ - PyObject_HEAD \ - /* prevent undefined behavior via multiple - * threads entering the C API */ \ - bool use_mutex; \ +#define HASHLIB_LOCK_HEAD \ + /* + * Attributes to prevent undefined behaviors + * via multiple threads entering the C API. + */ \ + bool use_mutex; \ PyMutex mutex; -#define ENTER_HASHLIB(obj) \ - if ((obj)->use_mutex) { \ - PyMutex_Lock(&(obj)->mutex); \ - } -#define LEAVE_HASHLIB(obj) \ - if ((obj)->use_mutex) { \ - PyMutex_Unlock(&(obj)->mutex); \ - } +#define HASHLIB_SET_MUTEX_POLICY(OBJ, VALUE) \ + _Py_atomic_store_int_relaxed((int *)&(OBJ)->use_mutex, (int)(VALUE)) + +#define ENTER_HASHLIB(OBJ) \ + do { \ + if (_Py_atomic_load_int_relaxed((const int *)&(OBJ)->use_mutex)) { \ + PyMutex_Lock(&(OBJ)->mutex); \ + } \ + } while (0) + +#define LEAVE_HASHLIB(OBJ) \ + do { \ + if (_Py_atomic_load_int_relaxed((const int *)&(OBJ)->use_mutex)) { \ + PyMutex_Unlock(&(OBJ)->mutex); \ + } \ + } while (0) #ifdef Py_GIL_DISABLED -#define HASHLIB_INIT_MUTEX(obj) \ - do { \ - (obj)->mutex = (PyMutex){0}; \ - (obj)->use_mutex = true; \ +#define HASHLIB_INIT_MUTEX(OBJ) \ + do { \ + (OBJ)->mutex = (PyMutex){0}; \ + (OBJ)->use_mutex = true; \ } while (0) #else -#define HASHLIB_INIT_MUTEX(obj) \ - do { \ - (obj)->mutex = (PyMutex){0}; \ - (obj)->use_mutex = false; \ +#define HASHLIB_INIT_MUTEX(OBJ) \ + do { \ + (OBJ)->mutex = (PyMutex){0}; \ + (OBJ)->use_mutex = false; \ } while (0) #endif diff --git a/Modules/md5module.c b/Modules/md5module.c index 69f787ca5ebc32..8500d933f91f02 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -32,7 +32,8 @@ #include "_hacl/Hacl_Hash_MD5.h" typedef struct { - HASHLIB_OBJECT_HEAD + PyObject_HEAD + HASHLIB_LOCK_HEAD Hacl_Hash_MD5_state_t *state; } MD5object; @@ -181,7 +182,7 @@ static void md5_update_state_with_lock(MD5object *self, uint8_t *buf, Py_ssize_t len) { Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); // unconditionally acquire a lock + PyMutex_Lock(&self->mutex); // unconditionally acquire a lock _hacl_md5_update(self->state, buf, len); PyMutex_Unlock(&self->mutex); Py_END_ALLOW_THREADS @@ -190,7 +191,7 @@ md5_update_state_with_lock(MD5object *self, uint8_t *buf, Py_ssize_t len) static void md5_update_state_cond_lock(MD5object *self, uint8_t *buf, Py_ssize_t len) { - ENTER_HASHLIB(self); // conditionally acquire a lock + ENTER_HASHLIB(self); // conditionally acquire a lock _hacl_md5_update(self->state, buf, len); LEAVE_HASHLIB(self); } @@ -200,10 +201,16 @@ md5_update_state(MD5object *self, uint8_t *buf, Py_ssize_t len) { assert(buf != 0); assert(len >= 0); - if (len != 0) { - len < HASHLIB_GIL_MINSIZE - ? md5_update_state_cond_lock(self, buf, len) - : md5_update_state_with_lock(self, buf, len); + if (len == 0) { + return; + } + if (len < HASHLIB_GIL_MINSIZE) { + md5_update_state_cond_lock(self, buf, len); + } + else { + HASHLIB_SET_MUTEX_POLICY(self, 1); + md5_update_state_with_lock(self, buf, len); + HASHLIB_SET_MUTEX_POLICY(self, 0); } } @@ -316,7 +323,7 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, Py_buffer buf; GET_BUFFER_VIEW_OR_ERROR(string, &buf, goto error); if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor + /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ Py_BEGIN_ALLOW_THREADS _hacl_md5_update(self->state, buf.buf, buf.len); From ea033a3f02b4318463b6fb976912bdc2f9ab7e81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 15 Jun 2025 11:42:41 +0200 Subject: [PATCH 07/41] Revert 5cd828acdcfef753aee5eec7e13f07682af40f46 --- Modules/md5module.c | 114 +++++++++++++++++++++++--------------------- 1 file changed, 59 insertions(+), 55 deletions(-) diff --git a/Modules/md5module.c b/Modules/md5module.c index 8500d933f91f02..db25c2bb971a05 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -24,6 +24,12 @@ #include "hashlib.h" #include "pycore_strhex.h" // _Py_strhex() +/*[clinic input] +module _md5 +class MD5Type "MD5object *" "&PyType_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/ + /* The MD5 block size and message digest sizes, in bytes */ #define MD5_BLOCKSIZE 64 @@ -31,65 +37,63 @@ #include "_hacl/Hacl_Hash_MD5.h" + typedef struct { PyObject_HEAD HASHLIB_LOCK_HEAD - Hacl_Hash_MD5_state_t *state; + Hacl_Hash_MD5_state_t *hash_state; } MD5object; #define _MD5object_CAST(op) ((MD5object *)(op)) +#include "clinic/md5module.c.h" + + typedef struct { - PyTypeObject *md5_type; -} md5module_state; + PyTypeObject* md5_type; +} MD5State; -static inline md5module_state * -get_md5module_state(PyObject *module) +static inline MD5State* +md5_get_state(PyObject *module) { void *state = PyModule_GetState(module); assert(state != NULL); - return (md5module_state *)state; + return (MD5State *)state; } -/*[clinic input] -module _md5 -class MD5Type "MD5object *" "&PyType_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/ - -#include "clinic/md5module.c.h" - static MD5object * -newMD5object(md5module_state *st) +newMD5object(MD5State * st) { - MD5object *self = PyObject_GC_New(MD5object, st->md5_type); - if (self == NULL) { + MD5object *md5 = PyObject_GC_New(MD5object, st->md5_type); + if (!md5) { return NULL; } - HASHLIB_INIT_MUTEX(self); - PyObject_GC_Track(self); - return self; + HASHLIB_INIT_MUTEX(md5); + + PyObject_GC_Track(md5); + return md5; } /* Internal methods for a hash object */ static int -MD5_traverse(PyObject *op, visitproc visit, void *arg) +MD5_traverse(PyObject *ptr, visitproc visit, void *arg) { - Py_VISIT(Py_TYPE(op)); + Py_VISIT(Py_TYPE(ptr)); return 0; } static void MD5_dealloc(PyObject *op) { - MD5object *self = _MD5object_CAST(op); - Hacl_Hash_MD5_free(self->state); + MD5object *ptr = _MD5object_CAST(op); + Hacl_Hash_MD5_free(ptr->hash_state); PyTypeObject *tp = Py_TYPE(op); - PyObject_GC_UnTrack(self); - PyObject_GC_Del(self); + PyObject_GC_UnTrack(ptr); + PyObject_GC_Del(ptr); Py_DECREF(tp); } + /* External methods for a hash object */ /*[clinic input] @@ -104,28 +108,28 @@ static PyObject * MD5Type_copy_impl(MD5object *self, PyTypeObject *cls) /*[clinic end generated code: output=bf055e08244bf5ee input=d89087dcfb2a8620]*/ { - md5module_state *st = PyType_GetModuleState(cls); + MD5State *st = PyType_GetModuleState(cls); - MD5object *copy = newMD5object(st); - if (copy == NULL) { + MD5object *newobj; + if ((newobj = newMD5object(st)) == NULL) { return NULL; } ENTER_HASHLIB(self); - copy->state = Hacl_Hash_MD5_copy(self->state); + newobj->hash_state = Hacl_Hash_MD5_copy(self->hash_state); LEAVE_HASHLIB(self); - if (copy->state == NULL) { - Py_DECREF(copy); + if (newobj->hash_state == NULL) { + Py_DECREF(self); return PyErr_NoMemory(); } - return (PyObject *)copy; + return (PyObject *)newobj; } static void md5_digest_compute_cond_lock(MD5object *self, uint8_t *digest) { ENTER_HASHLIB(self); - Hacl_Hash_MD5_digest(self->state, digest); + Hacl_Hash_MD5_digest(self->hash_state, digest); LEAVE_HASHLIB(self); } @@ -183,7 +187,7 @@ md5_update_state_with_lock(MD5object *self, uint8_t *buf, Py_ssize_t len) { Py_BEGIN_ALLOW_THREADS PyMutex_Lock(&self->mutex); // unconditionally acquire a lock - _hacl_md5_update(self->state, buf, len); + _hacl_md5_update(self->hash_state, buf, len); PyMutex_Unlock(&self->mutex); Py_END_ALLOW_THREADS } @@ -192,7 +196,7 @@ static void md5_update_state_cond_lock(MD5object *self, uint8_t *buf, Py_ssize_t len) { ENTER_HASHLIB(self); // conditionally acquire a lock - _hacl_md5_update(self->state, buf, len); + _hacl_md5_update(self->hash_state, buf, len); LEAVE_HASHLIB(self); } @@ -240,7 +244,7 @@ static PyMethodDef MD5_methods[] = { MD5TYPE_DIGEST_METHODDEF MD5TYPE_HEXDIGEST_METHODDEF MD5TYPE_UPDATE_METHODDEF - {NULL, NULL} /* sentinel */ + {NULL, NULL} /* sentinel */ }; static PyObject * @@ -265,7 +269,7 @@ static PyGetSetDef MD5_getseters[] = { {"block_size", MD5_get_block_size, NULL, NULL, NULL}, {"name", MD5_get_name, NULL, NULL, NULL}, {"digest_size", md5_get_digest_size, NULL, NULL, NULL}, - {NULL} /* sentinel */ + {NULL} /* Sentinel */ }; static PyType_Slot md5_type_slots[] = { @@ -273,12 +277,12 @@ static PyType_Slot md5_type_slots[] = { {Py_tp_methods, MD5_methods}, {Py_tp_getset, MD5_getseters}, {Py_tp_traverse, MD5_traverse}, - {0, 0} + {0,0} }; static PyType_Spec md5_type_spec = { .name = "_md5.md5", - .basicsize = sizeof(MD5object), + .basicsize = sizeof(MD5object), .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_HAVE_GC), .slots = md5_type_slots @@ -307,15 +311,15 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, return NULL; } - md5module_state *st = get_md5module_state(module); - MD5object *self = newMD5object(st); - if (self == NULL) { + MD5object *new; + MD5State *st = md5_get_state(module); + if ((new = newMD5object(st)) == NULL) { return NULL; } - self->state = Hacl_Hash_MD5_malloc(); - if (self->state == NULL) { - Py_DECREF(self); + new->hash_state = Hacl_Hash_MD5_malloc(); + if (new->hash_state == NULL) { + Py_DECREF(new); return PyErr_NoMemory(); } @@ -326,19 +330,19 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ Py_BEGIN_ALLOW_THREADS - _hacl_md5_update(self->state, buf.buf, buf.len); + _hacl_md5_update(new->hash_state, buf.buf, buf.len); Py_END_ALLOW_THREADS } else { - _hacl_md5_update(self->state, buf.buf, buf.len); + _hacl_md5_update(new->hash_state, buf.buf, buf.len); } PyBuffer_Release(&buf); } - return (PyObject *)self; + return (PyObject *)new; error: - Py_XDECREF(self); + Py_XDECREF(new); return NULL; } @@ -347,13 +351,13 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, static struct PyMethodDef MD5_functions[] = { _MD5_MD5_METHODDEF - {NULL, NULL} /* sentinel */ + {NULL, NULL} /* Sentinel */ }; static int _md5_traverse(PyObject *module, visitproc visit, void *arg) { - md5module_state *state = get_md5module_state(module); + MD5State *state = md5_get_state(module); Py_VISIT(state->md5_type); return 0; } @@ -361,7 +365,7 @@ _md5_traverse(PyObject *module, visitproc visit, void *arg) static int _md5_clear(PyObject *module) { - md5module_state *state = get_md5module_state(module); + MD5State *state = md5_get_state(module); Py_CLEAR(state->md5_type); return 0; } @@ -376,7 +380,7 @@ _md5_free(void *module) static int md5_exec(PyObject *m) { - md5module_state *st = get_md5module_state(m); + MD5State *st = md5_get_state(m); st->md5_type = (PyTypeObject *)PyType_FromModuleAndSpec( m, &md5_type_spec, NULL); @@ -402,7 +406,7 @@ static PyModuleDef_Slot _md5_slots[] = { static struct PyModuleDef _md5module = { PyModuleDef_HEAD_INIT, .m_name = "_md5", - .m_size = sizeof(md5module_state), + .m_size = sizeof(MD5State), .m_methods = MD5_functions, .m_slots = _md5_slots, .m_traverse = _md5_traverse, From 77baa67c9e4b4cbb9bdec938360dc241b0f58034 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 15 Jun 2025 11:44:38 +0200 Subject: [PATCH 08/41] revert some constructor changes --- Modules/md5module.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/Modules/md5module.c b/Modules/md5module.c index db25c2bb971a05..a05fd9d591fdac 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -312,20 +312,30 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, } MD5object *new; + Py_buffer buf; + + if (string) { + GET_BUFFER_VIEW_OR_ERROUT(string, &buf); + } + MD5State *st = md5_get_state(module); if ((new = newMD5object(st)) == NULL) { + if (string) { + PyBuffer_Release(&buf); + } return NULL; } new->hash_state = Hacl_Hash_MD5_malloc(); if (new->hash_state == NULL) { Py_DECREF(new); + if (string) { + PyBuffer_Release(&buf); + } return PyErr_NoMemory(); } if (string) { - Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROR(string, &buf, goto error); if (buf.len >= HASHLIB_GIL_MINSIZE) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ @@ -340,10 +350,6 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, } return (PyObject *)new; - -error: - Py_XDECREF(new); - return NULL; } From 7b7b1a365deded43f0cc2c2804cd0182e2abb7c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 15 Jun 2025 11:58:38 +0200 Subject: [PATCH 09/41] use a macro for the mutex API --- Modules/blake2module.c | 3 +-- Modules/hashlib.h | 8 ++++++++ Modules/hmacmodule.c | 4 +--- Modules/md5module.c | 4 +--- Modules/sha1module.c | 5 +---- Modules/sha2module.c | 8 ++------ Modules/sha3module.c | 4 +--- 7 files changed, 15 insertions(+), 21 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 2ce8c0cd3d7b6f..a4fe1b90473df4 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -353,6 +353,7 @@ type_to_impl(PyTypeObject *type) typedef struct { PyObject_HEAD + HASHLIB_MUTEX_API union { Hacl_Hash_Blake2s_state_t *blake2s_state; Hacl_Hash_Blake2b_state_t *blake2b_state; @@ -364,8 +365,6 @@ typedef struct { #endif }; blake2_impl impl; - bool use_mutex; - PyMutex mutex; } Blake2Object; #define _Blake2Object_CAST(op) ((Blake2Object *)(op)) diff --git a/Modules/hashlib.h b/Modules/hashlib.h index e82ec92be25c57..042ffd699decce 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -2,6 +2,14 @@ #include "pycore_lock.h" // PyMutex +#define HASHLIB_MUTEX_API \ + /* + * Attributes to prevent undefined behaviors + * via multiple threads entering the C API. + */ \ + bool use_mutex; \ + PyMutex mutex; + /* * Given a PyObject* obj, fill in the Py_buffer* viewp with the result * of PyObject_GetBuffer. Sets an exception and issues the erraction diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index b404d5732ec857..361129a406fe39 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -383,9 +383,7 @@ typedef Hacl_Streaming_HMAC_agile_state HACL_HMAC_state; typedef struct HMACObject { PyObject_HEAD - - bool use_mutex; - PyMutex mutex; + HASHLIB_MUTEX_API // Hash function information PyObject *name; // rendered name (exact unicode object) diff --git a/Modules/md5module.c b/Modules/md5module.c index 08dbcd2cbce844..d10723c329cab6 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -39,9 +39,7 @@ class MD5Type "MD5object *" "&PyType_Type" typedef struct { PyObject_HEAD - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; + HASHLIB_MUTEX_API Hacl_Hash_MD5_state_t *hash_state; } MD5object; diff --git a/Modules/sha1module.c b/Modules/sha1module.c index a746bf74f8d4c1..aadbaa98edc276 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -39,10 +39,7 @@ class SHA1Type "SHA1object *" "&PyType_Type" typedef struct { PyObject_HEAD - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; - PyThread_type_lock lock; + HASHLIB_MUTEX_API Hacl_Hash_SHA1_state_t *hash_state; } SHA1object; diff --git a/Modules/sha2module.c b/Modules/sha2module.c index 72931910c5d720..250c08d105b8f5 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -52,18 +52,14 @@ class SHA512Type "SHA512object *" "&PyType_Type" typedef struct { PyObject_HEAD int digestsize; - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; + HASHLIB_MUTEX_API Hacl_Hash_SHA2_state_t_256 *state; } SHA256object; typedef struct { PyObject_HEAD int digestsize; - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; + HASHLIB_MUTEX_API Hacl_Hash_SHA2_state_t_512 *state; } SHA512object; diff --git a/Modules/sha3module.c b/Modules/sha3module.c index cfbf0cbcc042c5..76e5d69d32f77d 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -60,9 +60,7 @@ class _sha3.shake_256 "SHA3object *" "&SHAKE256type" typedef struct { PyObject_HEAD - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; + HASHLIB_MUTEX_API Hacl_Hash_SHA3_state_t *hash_state; } SHA3object; From 0dcc799210549760f1d6a1e3ac91b799d0674fd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 15 Jun 2025 12:00:02 +0200 Subject: [PATCH 10/41] use 'state' attribute instead of 'hash_state' --- Modules/md5module.c | 24 ++++++++++---------- Modules/sha1module.c | 28 +++++++++++------------ Modules/sha3module.c | 54 ++++++++++++++++++++++---------------------- 3 files changed, 53 insertions(+), 53 deletions(-) diff --git a/Modules/md5module.c b/Modules/md5module.c index d10723c329cab6..459056dc1fa4f2 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -40,7 +40,7 @@ class MD5Type "MD5object *" "&PyType_Type" typedef struct { PyObject_HEAD HASHLIB_MUTEX_API - Hacl_Hash_MD5_state_t *hash_state; + Hacl_Hash_MD5_state_t *state; } MD5object; #define _MD5object_CAST(op) ((MD5object *)(op)) @@ -85,7 +85,7 @@ static void MD5_dealloc(PyObject *op) { MD5object *ptr = _MD5object_CAST(op); - Hacl_Hash_MD5_free(ptr->hash_state); + Hacl_Hash_MD5_free(ptr->state); PyTypeObject *tp = Py_TYPE(op); PyObject_GC_UnTrack(ptr); PyObject_GC_Del(ptr); @@ -115,9 +115,9 @@ MD5Type_copy_impl(MD5object *self, PyTypeObject *cls) } ENTER_HASHLIB(self); - newobj->hash_state = Hacl_Hash_MD5_copy(self->hash_state); + newobj->state = Hacl_Hash_MD5_copy(self->state); LEAVE_HASHLIB(self); - if (newobj->hash_state == NULL) { + if (newobj->state == NULL) { Py_DECREF(newobj); return PyErr_NoMemory(); } @@ -136,7 +136,7 @@ MD5Type_digest_impl(MD5object *self) { unsigned char digest[MD5_DIGESTSIZE]; ENTER_HASHLIB(self); - Hacl_Hash_MD5_digest(self->hash_state, digest); + Hacl_Hash_MD5_digest(self->state, digest); LEAVE_HASHLIB(self); return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE); } @@ -153,7 +153,7 @@ MD5Type_hexdigest_impl(MD5object *self) { unsigned char digest[MD5_DIGESTSIZE]; ENTER_HASHLIB(self); - Hacl_Hash_MD5_digest(self->hash_state, digest); + Hacl_Hash_MD5_digest(self->state, digest); LEAVE_HASHLIB(self); const char *hexdigits = "0123456789abcdef"; @@ -209,11 +209,11 @@ MD5Type_update_impl(MD5object *self, PyObject *obj) if (self->use_mutex) { Py_BEGIN_ALLOW_THREADS PyMutex_Lock(&self->mutex); - update(self->hash_state, buf.buf, buf.len); + update(self->state, buf.buf, buf.len); PyMutex_Unlock(&self->mutex); Py_END_ALLOW_THREADS } else { - update(self->hash_state, buf.buf, buf.len); + update(self->state, buf.buf, buf.len); } PyBuffer_Release(&buf); @@ -307,8 +307,8 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, return NULL; } - new->hash_state = Hacl_Hash_MD5_malloc(); - if (new->hash_state == NULL) { + new->state = Hacl_Hash_MD5_malloc(); + if (new->state == NULL) { Py_DECREF(new); if (string) { PyBuffer_Release(&buf); @@ -321,11 +321,11 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, /* We do not initialize self->lock here as this is the constructor * where it is not yet possible to have concurrent access. */ Py_BEGIN_ALLOW_THREADS - update(new->hash_state, buf.buf, buf.len); + update(new->state, buf.buf, buf.len); Py_END_ALLOW_THREADS } else { - update(new->hash_state, buf.buf, buf.len); + update(new->state, buf.buf, buf.len); } PyBuffer_Release(&buf); } diff --git a/Modules/sha1module.c b/Modules/sha1module.c index aadbaa98edc276..eb3214813033bf 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -40,7 +40,7 @@ class SHA1Type "SHA1object *" "&PyType_Type" typedef struct { PyObject_HEAD HASHLIB_MUTEX_API - Hacl_Hash_SHA1_state_t *hash_state; + Hacl_Hash_SHA1_state_t *state; } SHA1object; #define _SHA1object_CAST(op) ((SHA1object *)(op)) @@ -86,9 +86,9 @@ static void SHA1_dealloc(PyObject *op) { SHA1object *ptr = _SHA1object_CAST(op); - if (ptr->hash_state != NULL) { - Hacl_Hash_SHA1_free(ptr->hash_state); - ptr->hash_state = NULL; + if (ptr->state != NULL) { + Hacl_Hash_SHA1_free(ptr->state); + ptr->state = NULL; } PyTypeObject *tp = Py_TYPE(ptr); PyObject_GC_UnTrack(ptr); @@ -119,9 +119,9 @@ SHA1Type_copy_impl(SHA1object *self, PyTypeObject *cls) } ENTER_HASHLIB(self); - newobj->hash_state = Hacl_Hash_SHA1_copy(self->hash_state); + newobj->state = Hacl_Hash_SHA1_copy(self->state); LEAVE_HASHLIB(self); - if (newobj->hash_state == NULL) { + if (newobj->state == NULL) { Py_DECREF(newobj); return PyErr_NoMemory(); } @@ -140,7 +140,7 @@ SHA1Type_digest_impl(SHA1object *self) { unsigned char digest[SHA1_DIGESTSIZE]; ENTER_HASHLIB(self); - Hacl_Hash_SHA1_digest(self->hash_state, digest); + Hacl_Hash_SHA1_digest(self->state, digest); LEAVE_HASHLIB(self); return PyBytes_FromStringAndSize((const char *)digest, SHA1_DIGESTSIZE); } @@ -157,7 +157,7 @@ SHA1Type_hexdigest_impl(SHA1object *self) { unsigned char digest[SHA1_DIGESTSIZE]; ENTER_HASHLIB(self); - Hacl_Hash_SHA1_digest(self->hash_state, digest); + Hacl_Hash_SHA1_digest(self->state, digest); LEAVE_HASHLIB(self); return _Py_strhex((const char *)digest, SHA1_DIGESTSIZE); } @@ -204,11 +204,11 @@ SHA1Type_update_impl(SHA1object *self, PyObject *obj) if (self->use_mutex) { Py_BEGIN_ALLOW_THREADS PyMutex_Lock(&self->mutex); - update(self->hash_state, buf.buf, buf.len); + update(self->state, buf.buf, buf.len); PyMutex_Unlock(&self->mutex); Py_END_ALLOW_THREADS } else { - update(self->hash_state, buf.buf, buf.len); + update(self->state, buf.buf, buf.len); } PyBuffer_Release(&buf); @@ -301,9 +301,9 @@ _sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, return NULL; } - new->hash_state = Hacl_Hash_SHA1_malloc(); + new->state = Hacl_Hash_SHA1_malloc(); - if (new->hash_state == NULL) { + if (new->state == NULL) { Py_DECREF(new); if (string) { PyBuffer_Release(&buf); @@ -315,11 +315,11 @@ _sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, /* We do not initialize self->lock here as this is the constructor * where it is not yet possible to have concurrent access. */ Py_BEGIN_ALLOW_THREADS - update(new->hash_state, buf.buf, buf.len); + update(new->state, buf.buf, buf.len); Py_END_ALLOW_THREADS } else { - update(new->hash_state, buf.buf, buf.len); + update(new->state, buf.buf, buf.len); } PyBuffer_Release(&buf); } diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 76e5d69d32f77d..9de80ac16427ab 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -61,7 +61,7 @@ class _sha3.shake_256 "SHA3object *" "&SHAKE256type" typedef struct { PyObject_HEAD HASHLIB_MUTEX_API - Hacl_Hash_SHA3_state_t *hash_state; + Hacl_Hash_SHA3_state_t *state; } SHA3object; #define _SHA3object_CAST(op) ((SHA3object *)(op)) @@ -132,29 +132,29 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, assert(state != NULL); if (type == state->sha3_224_type) { - self->hash_state = Hacl_Hash_SHA3_malloc(Spec_Hash_Definitions_SHA3_224); + self->state = Hacl_Hash_SHA3_malloc(Spec_Hash_Definitions_SHA3_224); } else if (type == state->sha3_256_type) { - self->hash_state = Hacl_Hash_SHA3_malloc(Spec_Hash_Definitions_SHA3_256); + self->state = Hacl_Hash_SHA3_malloc(Spec_Hash_Definitions_SHA3_256); } else if (type == state->sha3_384_type) { - self->hash_state = Hacl_Hash_SHA3_malloc(Spec_Hash_Definitions_SHA3_384); + self->state = Hacl_Hash_SHA3_malloc(Spec_Hash_Definitions_SHA3_384); } else if (type == state->sha3_512_type) { - self->hash_state = Hacl_Hash_SHA3_malloc(Spec_Hash_Definitions_SHA3_512); + self->state = Hacl_Hash_SHA3_malloc(Spec_Hash_Definitions_SHA3_512); } else if (type == state->shake_128_type) { - self->hash_state = Hacl_Hash_SHA3_malloc(Spec_Hash_Definitions_Shake128); + self->state = Hacl_Hash_SHA3_malloc(Spec_Hash_Definitions_Shake128); } else if (type == state->shake_256_type) { - self->hash_state = Hacl_Hash_SHA3_malloc(Spec_Hash_Definitions_Shake256); + self->state = Hacl_Hash_SHA3_malloc(Spec_Hash_Definitions_Shake256); } else { PyErr_BadInternalCall(); goto error; } - if (self->hash_state == NULL) { + if (self->state == NULL) { (void)PyErr_NoMemory(); goto error; } @@ -165,11 +165,11 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, /* We do not initialize self->lock here as this is the constructor * where it is not yet possible to have concurrent access. */ Py_BEGIN_ALLOW_THREADS - sha3_update(self->hash_state, buf.buf, buf.len); + sha3_update(self->state, buf.buf, buf.len); Py_END_ALLOW_THREADS } else { - sha3_update(self->hash_state, buf.buf, buf.len); + sha3_update(self->state, buf.buf, buf.len); } } @@ -194,9 +194,9 @@ static int SHA3_clear(PyObject *op) { SHA3object *self = _SHA3object_CAST(op); - if (self->hash_state != NULL) { - Hacl_Hash_SHA3_free(self->hash_state); - self->hash_state = NULL; + if (self->state != NULL) { + Hacl_Hash_SHA3_free(self->state); + self->state = NULL; } return 0; } @@ -237,9 +237,9 @@ _sha3_sha3_224_copy_impl(SHA3object *self) return NULL; } ENTER_HASHLIB(self); - newobj->hash_state = Hacl_Hash_SHA3_copy(self->hash_state); + newobj->state = Hacl_Hash_SHA3_copy(self->state); LEAVE_HASHLIB(self); - if (newobj->hash_state == NULL) { + if (newobj->state == NULL) { Py_DECREF(newobj); return PyErr_NoMemory(); } @@ -261,10 +261,10 @@ _sha3_sha3_224_digest_impl(SHA3object *self) // This function errors out if the algorithm is SHAKE. Here, we know this // not to be the case, and therefore do not perform error checking. ENTER_HASHLIB(self); - (void)Hacl_Hash_SHA3_digest(self->hash_state, digest); + (void)Hacl_Hash_SHA3_digest(self->state, digest); LEAVE_HASHLIB(self); return PyBytes_FromStringAndSize((const char *)digest, - Hacl_Hash_SHA3_hash_len(self->hash_state)); + Hacl_Hash_SHA3_hash_len(self->state)); } @@ -280,10 +280,10 @@ _sha3_sha3_224_hexdigest_impl(SHA3object *self) { unsigned char digest[SHA3_MAX_DIGESTSIZE]; ENTER_HASHLIB(self); - (void)Hacl_Hash_SHA3_digest(self->hash_state, digest); + (void)Hacl_Hash_SHA3_digest(self->state, digest); LEAVE_HASHLIB(self); return _Py_strhex((const char *)digest, - Hacl_Hash_SHA3_hash_len(self->hash_state)); + Hacl_Hash_SHA3_hash_len(self->state)); } @@ -310,11 +310,11 @@ _sha3_sha3_224_update_impl(SHA3object *self, PyObject *data) if (self->use_mutex) { Py_BEGIN_ALLOW_THREADS PyMutex_Lock(&self->mutex); - sha3_update(self->hash_state, buf.buf, buf.len); + sha3_update(self->state, buf.buf, buf.len); PyMutex_Unlock(&self->mutex); Py_END_ALLOW_THREADS } else { - sha3_update(self->hash_state, buf.buf, buf.len); + sha3_update(self->state, buf.buf, buf.len); } PyBuffer_Release(&buf); @@ -335,7 +335,7 @@ static PyObject * SHA3_get_block_size(PyObject *op, void *Py_UNUSED(closure)) { SHA3object *self = _SHA3object_CAST(op); - uint32_t rate = Hacl_Hash_SHA3_block_len(self->hash_state); + uint32_t rate = Hacl_Hash_SHA3_block_len(self->state); return PyLong_FromLong(rate); } @@ -372,10 +372,10 @@ SHA3_get_digest_size(PyObject *op, void *Py_UNUSED(closure)) { // Preserving previous behavior: variable-length algorithms return 0 SHA3object *self = _SHA3object_CAST(op); - if (Hacl_Hash_SHA3_is_shake(self->hash_state)) + if (Hacl_Hash_SHA3_is_shake(self->state)) return PyLong_FromLong(0); else - return PyLong_FromLong(Hacl_Hash_SHA3_hash_len(self->hash_state)); + return PyLong_FromLong(Hacl_Hash_SHA3_hash_len(self->state)); } @@ -383,7 +383,7 @@ static PyObject * SHA3_get_capacity_bits(PyObject *op, void *Py_UNUSED(closure)) { SHA3object *self = _SHA3object_CAST(op); - uint32_t rate = Hacl_Hash_SHA3_block_len(self->hash_state) * 8; + uint32_t rate = Hacl_Hash_SHA3_block_len(self->state) * 8; assert(rate <= 1600); int capacity = 1600 - rate; return PyLong_FromLong(capacity); @@ -394,7 +394,7 @@ static PyObject * SHA3_get_rate_bits(PyObject *op, void *Py_UNUSED(closure)) { SHA3object *self = _SHA3object_CAST(op); - uint32_t rate = Hacl_Hash_SHA3_block_len(self->hash_state) * 8; + uint32_t rate = Hacl_Hash_SHA3_block_len(self->state) * 8; return PyLong_FromLong(rate); } @@ -491,7 +491,7 @@ _SHAKE_digest(PyObject *op, unsigned long digestlen, int hex) * - the output length is zero -- we follow the existing behavior and return * an empty digest, without raising an error */ if (digestlen > 0) { - (void)Hacl_Hash_SHA3_squeeze(self->hash_state, digest, digestlen); + (void)Hacl_Hash_SHA3_squeeze(self->state, digest, digestlen); } if (hex) { result = _Py_strhex((const char *)digest, digestlen); From 5b7179672e718692ae1aa2851cbf42da5db2dfc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 15 Jun 2025 12:06:25 +0200 Subject: [PATCH 11/41] use 'state' and 'get_module_state' naming --- Modules/blake2module.c | 46 +++++++++++++++++++++--------------------- Modules/md5module.c | 34 +++++++++++++++---------------- Modules/sha1module.c | 34 +++++++++++++++---------------- Modules/sha2module.c | 44 ++++++++++++++++++++-------------------- Modules/sha3module.c | 27 +++++++++++++------------ 5 files changed, 93 insertions(+), 92 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index a4fe1b90473df4..f55f9b2c8cfe82 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -77,23 +77,23 @@ typedef struct { PyTypeObject *blake2s_type; bool can_run_simd128; bool can_run_simd256; -} Blake2State; +} blake2module_state; -static inline Blake2State * -blake2_get_state(PyObject *module) +static inline blake2module_state * +get_blake2module_state(PyObject *module) { void *state = _PyModule_GetState(module); assert(state != NULL); - return (Blake2State *)state; + return (blake2module_state *)state; } #if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) -static inline Blake2State * +static inline blake2module_state * blake2_get_state_from_type(PyTypeObject *module) { void *state = _PyType_GetModuleState(module); assert(state != NULL); - return (Blake2State *)state; + return (blake2module_state *)state; } #endif @@ -104,7 +104,7 @@ static struct PyMethodDef blake2mod_functions[] = { static int _blake2_traverse(PyObject *module, visitproc visit, void *arg) { - Blake2State *state = blake2_get_state(module); + blake2module_state *state = get_blake2module_state(module); Py_VISIT(state->blake2b_type); Py_VISIT(state->blake2s_type); return 0; @@ -113,7 +113,7 @@ _blake2_traverse(PyObject *module, visitproc visit, void *arg) static int _blake2_clear(PyObject *module) { - Blake2State *state = blake2_get_state(module); + blake2module_state *state = get_blake2module_state(module); Py_CLEAR(state->blake2b_type); Py_CLEAR(state->blake2s_type); return 0; @@ -126,7 +126,7 @@ _blake2_free(void *module) } static void -blake2module_init_cpu_features(Blake2State *state) +blake2module_init_cpu_features(blake2module_state *state) { /* This must be kept in sync with hmacmodule_init_cpu_features() * in hmacmodule.c */ @@ -204,8 +204,8 @@ blake2module_init_cpu_features(Blake2State *state) static int blake2_exec(PyObject *m) { - Blake2State *st = blake2_get_state(m); - blake2module_init_cpu_features(st); + blake2module_state *state = get_blake2module_state(m); + blake2module_init_cpu_features(state); #define ADD_INT(DICT, NAME, VALUE) \ do { \ @@ -229,17 +229,17 @@ blake2_exec(PyObject *m) ADD_INT_CONST("_GIL_MINSIZE", HASHLIB_GIL_MINSIZE); - st->blake2b_type = (PyTypeObject *)PyType_FromModuleAndSpec( + state->blake2b_type = (PyTypeObject *)PyType_FromModuleAndSpec( m, &blake2b_type_spec, NULL); - if (st->blake2b_type == NULL) { + if (state->blake2b_type == NULL) { return -1; } - if (PyModule_AddType(m, st->blake2b_type) < 0) { + if (PyModule_AddType(m, state->blake2b_type) < 0) { return -1; } - PyObject *d = st->blake2b_type->tp_dict; + PyObject *d = state->blake2b_type->tp_dict; ADD_INT(d, "SALT_SIZE", HACL_HASH_BLAKE2B_SALT_BYTES); ADD_INT(d, "PERSON_SIZE", HACL_HASH_BLAKE2B_PERSONAL_BYTES); ADD_INT(d, "MAX_KEY_SIZE", HACL_HASH_BLAKE2B_KEY_BYTES); @@ -251,17 +251,17 @@ blake2_exec(PyObject *m) ADD_INT_CONST("BLAKE2B_MAX_DIGEST_SIZE", HACL_HASH_BLAKE2B_OUT_BYTES); /* BLAKE2s */ - st->blake2s_type = (PyTypeObject *)PyType_FromModuleAndSpec( + state->blake2s_type = (PyTypeObject *)PyType_FromModuleAndSpec( m, &blake2s_type_spec, NULL); - if (st->blake2s_type == NULL) { + if (state->blake2s_type == NULL) { return -1; } - if (PyModule_AddType(m, st->blake2s_type) < 0) { + if (PyModule_AddType(m, state->blake2s_type) < 0) { return -1; } - d = st->blake2s_type->tp_dict; + d = state->blake2s_type->tp_dict; ADD_INT(d, "SALT_SIZE", HACL_HASH_BLAKE2S_SALT_BYTES); ADD_INT(d, "PERSON_SIZE", HACL_HASH_BLAKE2S_PERSONAL_BYTES); ADD_INT(d, "MAX_KEY_SIZE", HACL_HASH_BLAKE2S_KEY_BYTES); @@ -288,7 +288,7 @@ static struct PyModuleDef blake2_module = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "_blake2", .m_doc = blake2mod__doc__, - .m_size = sizeof(Blake2State), + .m_size = sizeof(blake2module_state), .m_methods = blake2mod_functions, .m_slots = _blake2_slots, .m_traverse = _blake2_traverse, @@ -332,18 +332,18 @@ static inline blake2_impl type_to_impl(PyTypeObject *type) { #if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) - Blake2State *st = blake2_get_state_from_type(type); + blake2module_state *state = blake2_get_state_from_type(type); #endif if (!strcmp(type->tp_name, blake2b_type_spec.name)) { #if HACL_CAN_COMPILE_SIMD256 - return st->can_run_simd256 ? Blake2b_256 : Blake2b; + return state->can_run_simd256 ? Blake2b_256 : Blake2b; #else return Blake2b; #endif } else if (!strcmp(type->tp_name, blake2s_type_spec.name)) { #if HACL_CAN_COMPILE_SIMD128 - return st->can_run_simd128 ? Blake2s_128 : Blake2s; + return state->can_run_simd128 ? Blake2s_128 : Blake2s; #else return Blake2s; #endif diff --git a/Modules/md5module.c b/Modules/md5module.c index 459056dc1fa4f2..e817403a849f85 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -49,21 +49,21 @@ typedef struct { typedef struct { - PyTypeObject* md5_type; -} MD5State; + PyTypeObject *md5_type; +} md5module_state; -static inline MD5State* -md5_get_state(PyObject *module) +static inline md5module_state * +get_md5module_state(PyObject *module) { void *state = PyModule_GetState(module); assert(state != NULL); - return (MD5State *)state; + return (md5module_state *)state; } static MD5object * -newMD5object(MD5State * st) +newMD5object(md5module_state *state) { - MD5object *md5 = PyObject_GC_New(MD5object, st->md5_type); + MD5object *md5 = PyObject_GC_New(MD5object, state->md5_type); if (!md5) { return NULL; } @@ -107,10 +107,10 @@ static PyObject * MD5Type_copy_impl(MD5object *self, PyTypeObject *cls) /*[clinic end generated code: output=bf055e08244bf5ee input=d89087dcfb2a8620]*/ { - MD5State *st = PyType_GetModuleState(cls); + md5module_state *state = PyType_GetModuleState(cls); MD5object *newobj; - if ((newobj = newMD5object(st)) == NULL) { + if ((newobj = newMD5object(state)) == NULL) { return NULL; } @@ -299,8 +299,8 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, GET_BUFFER_VIEW_OR_ERROUT(string, &buf); } - MD5State *st = md5_get_state(module); - if ((new = newMD5object(st)) == NULL) { + md5module_state *state = get_md5module_state(module); + if ((new = newMD5object(state)) == NULL) { if (string) { PyBuffer_Release(&buf); } @@ -344,7 +344,7 @@ static struct PyMethodDef MD5_functions[] = { static int _md5_traverse(PyObject *module, visitproc visit, void *arg) { - MD5State *state = md5_get_state(module); + md5module_state *state = get_md5module_state(module); Py_VISIT(state->md5_type); return 0; } @@ -352,7 +352,7 @@ _md5_traverse(PyObject *module, visitproc visit, void *arg) static int _md5_clear(PyObject *module) { - MD5State *state = md5_get_state(module); + md5module_state *state = get_md5module_state(module); Py_CLEAR(state->md5_type); return 0; } @@ -367,12 +367,12 @@ _md5_free(void *module) static int md5_exec(PyObject *m) { - MD5State *st = md5_get_state(m); + md5module_state *state = get_md5module_state(m); - st->md5_type = (PyTypeObject *)PyType_FromModuleAndSpec( + state->md5_type = (PyTypeObject *)PyType_FromModuleAndSpec( m, &md5_type_spec, NULL); - if (PyModule_AddObjectRef(m, "MD5Type", (PyObject *)st->md5_type) < 0) { + if (PyModule_AddObjectRef(m, "MD5Type", (PyObject *)state->md5_type) < 0) { return -1; } if (PyModule_AddIntConstant(m, "_GIL_MINSIZE", HASHLIB_GIL_MINSIZE) < 0) { @@ -393,7 +393,7 @@ static PyModuleDef_Slot _md5_slots[] = { static struct PyModuleDef _md5module = { PyModuleDef_HEAD_INIT, .m_name = "_md5", - .m_size = sizeof(MD5State), + .m_size = sizeof(md5module_state), .m_methods = MD5_functions, .m_slots = _md5_slots, .m_traverse = _md5_traverse, diff --git a/Modules/sha1module.c b/Modules/sha1module.c index eb3214813033bf..2b61953578a6ba 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -49,21 +49,21 @@ typedef struct { typedef struct { - PyTypeObject* sha1_type; -} SHA1State; + PyTypeObject *sha1_type; +} sha1module_state; -static inline SHA1State* -sha1_get_state(PyObject *module) +static inline sha1module_state * +get_sha1module_state(PyObject *module) { void *state = PyModule_GetState(module); assert(state != NULL); - return (SHA1State *)state; + return (sha1module_state *)state; } static SHA1object * -newSHA1object(SHA1State *st) +newSHA1object(sha1module_state *state) { - SHA1object *sha = PyObject_GC_New(SHA1object, st->sha1_type); + SHA1object *sha = PyObject_GC_New(SHA1object, state->sha1_type); if (sha == NULL) { return NULL; } @@ -111,10 +111,10 @@ static PyObject * SHA1Type_copy_impl(SHA1object *self, PyTypeObject *cls) /*[clinic end generated code: output=b32d4461ce8bc7a7 input=6c22e66fcc34c58e]*/ { - SHA1State *st = _PyType_GetModuleState(cls); + sha1module_state *state = _PyType_GetModuleState(cls); SHA1object *newobj; - if ((newobj = newSHA1object(st)) == NULL) { + if ((newobj = newSHA1object(state)) == NULL) { return NULL; } @@ -293,8 +293,8 @@ _sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, GET_BUFFER_VIEW_OR_ERROUT(string, &buf); } - SHA1State *st = sha1_get_state(module); - if ((new = newSHA1object(st)) == NULL) { + sha1module_state *state = get_sha1module_state(module); + if ((new = newSHA1object(state)) == NULL) { if (string) { PyBuffer_Release(&buf); } @@ -338,7 +338,7 @@ static struct PyMethodDef SHA1_functions[] = { static int _sha1_traverse(PyObject *module, visitproc visit, void *arg) { - SHA1State *state = sha1_get_state(module); + sha1module_state *state = get_sha1module_state(module); Py_VISIT(state->sha1_type); return 0; } @@ -346,7 +346,7 @@ _sha1_traverse(PyObject *module, visitproc visit, void *arg) static int _sha1_clear(PyObject *module) { - SHA1State *state = sha1_get_state(module); + sha1module_state *state = get_sha1module_state(module); Py_CLEAR(state->sha1_type); return 0; } @@ -360,13 +360,13 @@ _sha1_free(void *module) static int _sha1_exec(PyObject *module) { - SHA1State* st = sha1_get_state(module); + sha1module_state *state = get_sha1module_state(module); - st->sha1_type = (PyTypeObject *)PyType_FromModuleAndSpec( + state->sha1_type = (PyTypeObject *)PyType_FromModuleAndSpec( module, &sha1_type_spec, NULL); if (PyModule_AddObjectRef(module, "SHA1Type", - (PyObject *)st->sha1_type) < 0) + (PyObject *)state->sha1_type) < 0) { return -1; } @@ -393,7 +393,7 @@ static PyModuleDef_Slot _sha1_slots[] = { static struct PyModuleDef _sha1module = { PyModuleDef_HEAD_INIT, .m_name = "_sha1", - .m_size = sizeof(SHA1State), + .m_size = sizeof(sha1module_state), .m_methods = SHA1_functions, .m_slots = _sha1_slots, .m_traverse = _sha1_traverse, diff --git a/Modules/sha2module.c b/Modules/sha2module.c index 250c08d105b8f5..ca12135617f55c 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -71,18 +71,18 @@ typedef struct { /* We shall use run-time type information in the remainder of this module to * tell apart SHA2-224 and SHA2-256 */ typedef struct { - PyTypeObject* sha224_type; - PyTypeObject* sha256_type; - PyTypeObject* sha384_type; - PyTypeObject* sha512_type; -} sha2_state; + PyTypeObject *sha224_type; + PyTypeObject *sha256_type; + PyTypeObject *sha384_type; + PyTypeObject *sha512_type; +} sha2module_state; -static inline sha2_state* -sha2_get_state(PyObject *module) +static inline sha2module_state * +get_sha2module_state(PyObject *module) { void *state = _PyModule_GetState(module); assert(state != NULL); - return (sha2_state *)state; + return (sha2module_state *)state; } static int @@ -110,7 +110,7 @@ SHA512copy(SHA512object *src, SHA512object *dest) } static SHA256object * -newSHA224object(sha2_state *state) +newSHA224object(sha2module_state *state) { SHA256object *sha = PyObject_GC_New(SHA256object, state->sha224_type); if (!sha) { @@ -123,7 +123,7 @@ newSHA224object(sha2_state *state) } static SHA256object * -newSHA256object(sha2_state *state) +newSHA256object(sha2module_state *state) { SHA256object *sha = PyObject_GC_New(SHA256object, state->sha256_type); if (!sha) { @@ -136,7 +136,7 @@ newSHA256object(sha2_state *state) } static SHA512object * -newSHA384object(sha2_state *state) +newSHA384object(sha2module_state *state) { SHA512object *sha = PyObject_GC_New(SHA512object, state->sha384_type); if (!sha) { @@ -149,7 +149,7 @@ newSHA384object(sha2_state *state) } static SHA512object * -newSHA512object(sha2_state *state) +newSHA512object(sha2module_state *state) { SHA512object *sha = PyObject_GC_New(SHA512object, state->sha512_type); if (!sha) { @@ -256,7 +256,7 @@ SHA256Type_copy_impl(SHA256object *self, PyTypeObject *cls) { int rc; SHA256object *newobj; - sha2_state *state = _PyType_GetModuleState(cls); + sha2module_state *state = _PyType_GetModuleState(cls); if (Py_IS_TYPE(self, state->sha256_type)) { if ((newobj = newSHA256object(state)) == NULL) { return NULL; @@ -292,7 +292,7 @@ SHA512Type_copy_impl(SHA512object *self, PyTypeObject *cls) { int rc; SHA512object *newobj; - sha2_state *state = _PyType_GetModuleState(cls); + sha2module_state *state = _PyType_GetModuleState(cls); if (Py_IS_TYPE((PyObject*)self, state->sha512_type)) { if ((newobj = newSHA512object(state)) == NULL) { @@ -613,7 +613,7 @@ _sha2_sha256_impl(PyObject *module, PyObject *data, int usedforsecurity, GET_BUFFER_VIEW_OR_ERROUT(string, &buf); } - sha2_state *state = sha2_get_state(module); + sha2module_state *state = get_sha2module_state(module); SHA256object *new; if ((new = newSHA256object(state)) == NULL) { @@ -676,7 +676,7 @@ _sha2_sha224_impl(PyObject *module, PyObject *data, int usedforsecurity, GET_BUFFER_VIEW_OR_ERROUT(string, &buf); } - sha2_state *state = sha2_get_state(module); + sha2module_state *state = get_sha2module_state(module); SHA256object *new; if ((new = newSHA224object(state)) == NULL) { if (string) { @@ -735,7 +735,7 @@ _sha2_sha512_impl(PyObject *module, PyObject *data, int usedforsecurity, return NULL; } - sha2_state *state = sha2_get_state(module); + sha2module_state *state = get_sha2module_state(module); if (string) { GET_BUFFER_VIEW_OR_ERROUT(string, &buf); @@ -798,7 +798,7 @@ _sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity, return NULL; } - sha2_state *state = sha2_get_state(module); + sha2module_state *state = get_sha2module_state(module); if (string) { GET_BUFFER_VIEW_OR_ERROUT(string, &buf); @@ -851,7 +851,7 @@ static struct PyMethodDef SHA2_functions[] = { static int _sha2_traverse(PyObject *module, visitproc visit, void *arg) { - sha2_state *state = sha2_get_state(module); + sha2module_state *state = get_sha2module_state(module); Py_VISIT(state->sha224_type); Py_VISIT(state->sha256_type); Py_VISIT(state->sha384_type); @@ -862,7 +862,7 @@ _sha2_traverse(PyObject *module, visitproc visit, void *arg) static int _sha2_clear(PyObject *module) { - sha2_state *state = sha2_get_state(module); + sha2module_state *state = get_sha2module_state(module); Py_CLEAR(state->sha224_type); Py_CLEAR(state->sha256_type); Py_CLEAR(state->sha384_type); @@ -879,7 +879,7 @@ _sha2_free(void *module) /* Initialize this module. */ static int sha2_exec(PyObject *module) { - sha2_state *state = sha2_get_state(module); + sha2module_state *state = get_sha2module_state(module); state->sha224_type = (PyTypeObject *)PyType_FromModuleAndSpec( module, &sha224_type_spec, NULL); @@ -935,7 +935,7 @@ static PyModuleDef_Slot _sha2_slots[] = { static struct PyModuleDef _sha2module = { PyModuleDef_HEAD_INIT, .m_name = "_sha2", - .m_size = sizeof(sha2_state), + .m_size = sizeof(sha2module_state), .m_methods = SHA2_functions, .m_slots = _sha2_slots, .m_traverse = _sha2_traverse, diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 9de80ac16427ab..84f3742d96f972 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -31,16 +31,17 @@ typedef struct { PyTypeObject *sha3_256_type; PyTypeObject *sha3_384_type; PyTypeObject *sha3_512_type; + PyTypeObject *shake_128_type; PyTypeObject *shake_256_type; -} SHA3State; +} sha3module_state; -static inline SHA3State* -sha3_get_state(PyObject *module) +static inline sha3module_state * +get_sha3module_state(PyObject *module) { void *state = PyModule_GetState(module); assert(state != NULL); - return (SHA3State *)state; + return (sha3module_state *)state; } /*[clinic input] @@ -123,7 +124,7 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, } Py_buffer buf = {NULL, NULL}; - SHA3State *state = _PyType_GetModuleState(type); + sha3module_state *state = _PyType_GetModuleState(type); SHA3object *self = newSHA3object(type); if (self == NULL) { goto error; @@ -345,7 +346,7 @@ SHA3_get_name(PyObject *self, void *Py_UNUSED(closure)) { PyTypeObject *type = Py_TYPE(self); - SHA3State *state = _PyType_GetModuleState(type); + sha3module_state *state = _PyType_GetModuleState(type); assert(state != NULL); if (type == state->sha3_224_type) { @@ -588,7 +589,7 @@ SHA3_TYPE_SPEC(SHAKE256_spec, "shake_256", SHAKE256slots); static int _sha3_traverse(PyObject *module, visitproc visit, void *arg) { - SHA3State *state = sha3_get_state(module); + sha3module_state *state = get_sha3module_state(module); Py_VISIT(state->sha3_224_type); Py_VISIT(state->sha3_256_type); Py_VISIT(state->sha3_384_type); @@ -601,7 +602,7 @@ _sha3_traverse(PyObject *module, visitproc visit, void *arg) static int _sha3_clear(PyObject *module) { - SHA3State *state = sha3_get_state(module); + sha3module_state *state = get_sha3module_state(module); Py_CLEAR(state->sha3_224_type); Py_CLEAR(state->sha3_256_type); Py_CLEAR(state->sha3_384_type); @@ -620,16 +621,16 @@ _sha3_free(void *module) static int _sha3_exec(PyObject *m) { - SHA3State *st = sha3_get_state(m); + sha3module_state *state = get_sha3module_state(m); #define init_sha3type(type, typespec) \ do { \ - st->type = (PyTypeObject *)PyType_FromModuleAndSpec( \ + state->type = (PyTypeObject *)PyType_FromModuleAndSpec( \ m, &typespec, NULL); \ - if (st->type == NULL) { \ + if (state->type == NULL) { \ return -1; \ } \ - if (PyModule_AddType(m, st->type) < 0) { \ + if (PyModule_AddType(m, state->type) < 0) { \ return -1; \ } \ } while(0) @@ -663,7 +664,7 @@ static PyModuleDef_Slot _sha3_slots[] = { static struct PyModuleDef _sha3module = { PyModuleDef_HEAD_INIT, .m_name = "_sha3", - .m_size = sizeof(SHA3State), + .m_size = sizeof(sha3module_state), .m_slots = _sha3_slots, .m_traverse = _sha3_traverse, .m_clear = _sha3_clear, From 85ae8c34f4a1609008e993e41d57288cd5a8472c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 15 Jun 2025 12:14:08 +0200 Subject: [PATCH 12/41] use consistent 'module_{exec,traverse,clear,free}' naming --- Modules/blake2module.c | 18 +++++++++--------- Modules/md5module.c | 18 +++++++++--------- Modules/sha1module.c | 18 +++++++++--------- Modules/sha2module.c | 19 ++++++++++--------- Modules/sha3module.c | 18 +++++++++--------- 5 files changed, 46 insertions(+), 45 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index f55f9b2c8cfe82..9f84c3fbccdfe1 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -102,7 +102,7 @@ static struct PyMethodDef blake2mod_functions[] = { }; static int -_blake2_traverse(PyObject *module, visitproc visit, void *arg) +blake2module_traverse(PyObject *module, visitproc visit, void *arg) { blake2module_state *state = get_blake2module_state(module); Py_VISIT(state->blake2b_type); @@ -111,7 +111,7 @@ _blake2_traverse(PyObject *module, visitproc visit, void *arg) } static int -_blake2_clear(PyObject *module) +blake2module_clear(PyObject *module) { blake2module_state *state = get_blake2module_state(module); Py_CLEAR(state->blake2b_type); @@ -120,9 +120,9 @@ _blake2_clear(PyObject *module) } static void -_blake2_free(void *module) +blake2module_free(void *module) { - (void)_blake2_clear((PyObject *)module); + (void)blake2module_clear((PyObject *)module); } static void @@ -202,7 +202,7 @@ blake2module_init_cpu_features(blake2module_state *state) } static int -blake2_exec(PyObject *m) +blake2module_exec(PyObject *m) { blake2module_state *state = get_blake2module_state(m); blake2module_init_cpu_features(state); @@ -278,7 +278,7 @@ blake2_exec(PyObject *m) } static PyModuleDef_Slot _blake2_slots[] = { - {Py_mod_exec, blake2_exec}, + {Py_mod_exec, blake2module_exec}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL} @@ -291,9 +291,9 @@ static struct PyModuleDef blake2_module = { .m_size = sizeof(blake2module_state), .m_methods = blake2mod_functions, .m_slots = _blake2_slots, - .m_traverse = _blake2_traverse, - .m_clear = _blake2_clear, - .m_free = _blake2_free, + .m_traverse = blake2module_traverse, + .m_clear = blake2module_clear, + .m_free = blake2module_free, }; PyMODINIT_FUNC diff --git a/Modules/md5module.c b/Modules/md5module.c index e817403a849f85..539e9e4da6e5b6 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -342,7 +342,7 @@ static struct PyMethodDef MD5_functions[] = { }; static int -_md5_traverse(PyObject *module, visitproc visit, void *arg) +md5module_traverse(PyObject *module, visitproc visit, void *arg) { md5module_state *state = get_md5module_state(module); Py_VISIT(state->md5_type); @@ -350,7 +350,7 @@ _md5_traverse(PyObject *module, visitproc visit, void *arg) } static int -_md5_clear(PyObject *module) +md5module_clear(PyObject *module) { md5module_state *state = get_md5module_state(module); Py_CLEAR(state->md5_type); @@ -358,14 +358,14 @@ _md5_clear(PyObject *module) } static void -_md5_free(void *module) +md5module_free(void *module) { - _md5_clear((PyObject *)module); + (void)md5module_clear((PyObject *)module); } /* Initialize this module. */ static int -md5_exec(PyObject *m) +md5module_exec(PyObject *m) { md5module_state *state = get_md5module_state(m); @@ -383,7 +383,7 @@ md5_exec(PyObject *m) } static PyModuleDef_Slot _md5_slots[] = { - {Py_mod_exec, md5_exec}, + {Py_mod_exec, md5module_exec}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL} @@ -396,9 +396,9 @@ static struct PyModuleDef _md5module = { .m_size = sizeof(md5module_state), .m_methods = MD5_functions, .m_slots = _md5_slots, - .m_traverse = _md5_traverse, - .m_clear = _md5_clear, - .m_free = _md5_free, + .m_traverse = md5module_traverse, + .m_clear = md5module_clear, + .m_free = md5module_free, }; PyMODINIT_FUNC diff --git a/Modules/sha1module.c b/Modules/sha1module.c index 2b61953578a6ba..450d2c64ce5ee5 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -336,7 +336,7 @@ static struct PyMethodDef SHA1_functions[] = { }; static int -_sha1_traverse(PyObject *module, visitproc visit, void *arg) +sha1module_traverse(PyObject *module, visitproc visit, void *arg) { sha1module_state *state = get_sha1module_state(module); Py_VISIT(state->sha1_type); @@ -344,7 +344,7 @@ _sha1_traverse(PyObject *module, visitproc visit, void *arg) } static int -_sha1_clear(PyObject *module) +sha1module_clear(PyObject *module) { sha1module_state *state = get_sha1module_state(module); Py_CLEAR(state->sha1_type); @@ -352,13 +352,13 @@ _sha1_clear(PyObject *module) } static void -_sha1_free(void *module) +sha1module_free(void *module) { - (void)_sha1_clear((PyObject *)module); + (void)sha1module_clear((PyObject *)module); } static int -_sha1_exec(PyObject *module) +sha1module_exec(PyObject *module) { sha1module_state *state = get_sha1module_state(module); @@ -384,7 +384,7 @@ _sha1_exec(PyObject *module) /* Initialize this module. */ static PyModuleDef_Slot _sha1_slots[] = { - {Py_mod_exec, _sha1_exec}, + {Py_mod_exec, sha1module_exec}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL} @@ -396,9 +396,9 @@ static struct PyModuleDef _sha1module = { .m_size = sizeof(sha1module_state), .m_methods = SHA1_functions, .m_slots = _sha1_slots, - .m_traverse = _sha1_traverse, - .m_clear = _sha1_clear, - .m_free = _sha1_free + .m_traverse = sha1module_traverse, + .m_clear = sha1module_clear, + .m_free = sha1module_free }; PyMODINIT_FUNC diff --git a/Modules/sha2module.c b/Modules/sha2module.c index ca12135617f55c..66014dc20a6452 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -849,7 +849,7 @@ static struct PyMethodDef SHA2_functions[] = { }; static int -_sha2_traverse(PyObject *module, visitproc visit, void *arg) +sha2module_traverse(PyObject *module, visitproc visit, void *arg) { sha2module_state *state = get_sha2module_state(module); Py_VISIT(state->sha224_type); @@ -860,7 +860,7 @@ _sha2_traverse(PyObject *module, visitproc visit, void *arg) } static int -_sha2_clear(PyObject *module) +sha2module_clear(PyObject *module) { sha2module_state *state = get_sha2module_state(module); Py_CLEAR(state->sha224_type); @@ -871,13 +871,14 @@ _sha2_clear(PyObject *module) } static void -_sha2_free(void *module) +sha2module_free(void *module) { - (void)_sha2_clear((PyObject *)module); + (void)sha2module_clear((PyObject *)module); } /* Initialize this module. */ -static int sha2_exec(PyObject *module) +static int +sha2module_exec(PyObject *module) { sha2module_state *state = get_sha2module_state(module); @@ -926,7 +927,7 @@ static int sha2_exec(PyObject *module) } static PyModuleDef_Slot _sha2_slots[] = { - {Py_mod_exec, sha2_exec}, + {Py_mod_exec, sha2module_exec}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL} @@ -938,9 +939,9 @@ static struct PyModuleDef _sha2module = { .m_size = sizeof(sha2module_state), .m_methods = SHA2_functions, .m_slots = _sha2_slots, - .m_traverse = _sha2_traverse, - .m_clear = _sha2_clear, - .m_free = _sha2_free + .m_traverse = sha2module_traverse, + .m_clear = sha2module_clear, + .m_free = sha2module_free }; PyMODINIT_FUNC diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 84f3742d96f972..20510add09b8d4 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -587,7 +587,7 @@ SHA3_TYPE_SPEC(SHAKE256_spec, "shake_256", SHAKE256slots); static int -_sha3_traverse(PyObject *module, visitproc visit, void *arg) +sha3module_traverse(PyObject *module, visitproc visit, void *arg) { sha3module_state *state = get_sha3module_state(module); Py_VISIT(state->sha3_224_type); @@ -600,7 +600,7 @@ _sha3_traverse(PyObject *module, visitproc visit, void *arg) } static int -_sha3_clear(PyObject *module) +sha3module_clear(PyObject *module) { sha3module_state *state = get_sha3module_state(module); Py_CLEAR(state->sha3_224_type); @@ -613,13 +613,13 @@ _sha3_clear(PyObject *module) } static void -_sha3_free(void *module) +sha3module_free(void *module) { - (void)_sha3_clear((PyObject *)module); + (void)sha3module_clear((PyObject *)module); } static int -_sha3_exec(PyObject *m) +sha3module_exec(PyObject *m) { sha3module_state *state = get_sha3module_state(m); @@ -654,7 +654,7 @@ _sha3_exec(PyObject *m) } static PyModuleDef_Slot _sha3_slots[] = { - {Py_mod_exec, _sha3_exec}, + {Py_mod_exec, sha3module_exec}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL} @@ -666,9 +666,9 @@ static struct PyModuleDef _sha3module = { .m_name = "_sha3", .m_size = sizeof(sha3module_state), .m_slots = _sha3_slots, - .m_traverse = _sha3_traverse, - .m_clear = _sha3_clear, - .m_free = _sha3_free, + .m_traverse = sha3module_traverse, + .m_clear = sha3module_clear, + .m_free = sha3module_free, }; From 64e1513e3d71fd08bede77c3c7a883508c89c573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 15 Jun 2025 12:18:14 +0200 Subject: [PATCH 13/41] use consistent 'module_{methods,slots,def}' naming --- Modules/blake2module.c | 19 +++++++------------ Modules/hmacmodule.c | 4 ++-- Modules/md5module.c | 12 ++++++------ Modules/sha1module.c | 12 ++++++------ Modules/sha2module.c | 12 ++++++------ Modules/sha3module.c | 8 ++++---- 6 files changed, 31 insertions(+), 36 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 9f84c3fbccdfe1..396135263ac80e 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -69,7 +69,7 @@ static PyType_Spec blake2b_type_spec; static PyType_Spec blake2s_type_spec; -PyDoc_STRVAR(blake2mod__doc__, +PyDoc_STRVAR(blake2module__doc__, "_blake2 provides BLAKE2b and BLAKE2s for hashlib\n"); typedef struct { @@ -97,10 +97,6 @@ blake2_get_state_from_type(PyTypeObject *module) } #endif -static struct PyMethodDef blake2mod_functions[] = { - {NULL, NULL} -}; - static int blake2module_traverse(PyObject *module, visitproc visit, void *arg) { @@ -277,20 +273,19 @@ blake2module_exec(PyObject *m) return 0; } -static PyModuleDef_Slot _blake2_slots[] = { +static PyModuleDef_Slot blake2module_slots[] = { {Py_mod_exec, blake2module_exec}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL} }; -static struct PyModuleDef blake2_module = { - .m_base = PyModuleDef_HEAD_INIT, +static struct PyModuleDef blake2module_def = { + PyModuleDef_HEAD_INIT, .m_name = "_blake2", - .m_doc = blake2mod__doc__, + .m_doc = blake2module__doc__, .m_size = sizeof(blake2module_state), - .m_methods = blake2mod_functions, - .m_slots = _blake2_slots, + .m_slots = blake2module_slots, .m_traverse = blake2module_traverse, .m_clear = blake2module_clear, .m_free = blake2module_free, @@ -299,7 +294,7 @@ static struct PyModuleDef blake2_module = { PyMODINIT_FUNC PyInit__blake2(void) { - return PyModuleDef_Init(&blake2_module); + return PyModuleDef_Init(&blake2module_def); } // IMPLEMENTATION OF METHODS diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 361129a406fe39..30cc9db5540d82 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -1839,7 +1839,7 @@ static struct PyModuleDef_Slot hmacmodule_slots[] = { {0, NULL} /* sentinel */ }; -static struct PyModuleDef _hmacmodule = { +static struct PyModuleDef hmacmodule_def = { PyModuleDef_HEAD_INIT, .m_name = "_hmac", .m_size = sizeof(hmacmodule_state), @@ -1853,5 +1853,5 @@ static struct PyModuleDef _hmacmodule = { PyMODINIT_FUNC PyInit__hmac(void) { - return PyModuleDef_Init(&_hmacmodule); + return PyModuleDef_Init(&hmacmodule_def); } diff --git a/Modules/md5module.c b/Modules/md5module.c index 539e9e4da6e5b6..2c4bc970f0defd 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -336,7 +336,7 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, /* List of functions exported by this module */ -static struct PyMethodDef MD5_functions[] = { +static struct PyMethodDef md5module_methods[] = { _MD5_MD5_METHODDEF {NULL, NULL} /* Sentinel */ }; @@ -382,7 +382,7 @@ md5module_exec(PyObject *m) return 0; } -static PyModuleDef_Slot _md5_slots[] = { +static PyModuleDef_Slot md5module_slots[] = { {Py_mod_exec, md5module_exec}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, @@ -390,12 +390,12 @@ static PyModuleDef_Slot _md5_slots[] = { }; -static struct PyModuleDef _md5module = { +static struct PyModuleDef md5module_def = { PyModuleDef_HEAD_INIT, .m_name = "_md5", .m_size = sizeof(md5module_state), - .m_methods = MD5_functions, - .m_slots = _md5_slots, + .m_methods = md5module_methods, + .m_slots = md5module_slots, .m_traverse = md5module_traverse, .m_clear = md5module_clear, .m_free = md5module_free, @@ -404,5 +404,5 @@ static struct PyModuleDef _md5module = { PyMODINIT_FUNC PyInit__md5(void) { - return PyModuleDef_Init(&_md5module); + return PyModuleDef_Init(&md5module_def); } diff --git a/Modules/sha1module.c b/Modules/sha1module.c index 450d2c64ce5ee5..db95f059f49798 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -330,7 +330,7 @@ _sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, /* List of functions exported by this module */ -static struct PyMethodDef SHA1_functions[] = { +static struct PyMethodDef sha1module_methods[] = { _SHA1_SHA1_METHODDEF {NULL, NULL} /* Sentinel */ }; @@ -383,19 +383,19 @@ sha1module_exec(PyObject *module) /* Initialize this module. */ -static PyModuleDef_Slot _sha1_slots[] = { +static PyModuleDef_Slot sha1module_slots[] = { {Py_mod_exec, sha1module_exec}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL} }; -static struct PyModuleDef _sha1module = { +static struct PyModuleDef sha1module_def = { PyModuleDef_HEAD_INIT, .m_name = "_sha1", .m_size = sizeof(sha1module_state), - .m_methods = SHA1_functions, - .m_slots = _sha1_slots, + .m_methods = sha1module_methods, + .m_slots = sha1module_slots, .m_traverse = sha1module_traverse, .m_clear = sha1module_clear, .m_free = sha1module_free @@ -404,5 +404,5 @@ static struct PyModuleDef _sha1module = { PyMODINIT_FUNC PyInit__sha1(void) { - return PyModuleDef_Init(&_sha1module); + return PyModuleDef_Init(&sha1module_def); } diff --git a/Modules/sha2module.c b/Modules/sha2module.c index 66014dc20a6452..6ab53a96989c41 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -840,7 +840,7 @@ _sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity, /* List of functions exported by this module */ -static struct PyMethodDef SHA2_functions[] = { +static struct PyMethodDef sha2module_methods[] = { _SHA2_SHA256_METHODDEF _SHA2_SHA224_METHODDEF _SHA2_SHA512_METHODDEF @@ -926,19 +926,19 @@ sha2module_exec(PyObject *module) return 0; } -static PyModuleDef_Slot _sha2_slots[] = { +static PyModuleDef_Slot sha2module_slots[] = { {Py_mod_exec, sha2module_exec}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL} }; -static struct PyModuleDef _sha2module = { +static struct PyModuleDef sha2module_def = { PyModuleDef_HEAD_INIT, .m_name = "_sha2", .m_size = sizeof(sha2module_state), - .m_methods = SHA2_functions, - .m_slots = _sha2_slots, + .m_methods = sha2module_methods, + .m_slots = sha2module_slots, .m_traverse = sha2module_traverse, .m_clear = sha2module_clear, .m_free = sha2module_free @@ -947,5 +947,5 @@ static struct PyModuleDef _sha2module = { PyMODINIT_FUNC PyInit__sha2(void) { - return PyModuleDef_Init(&_sha2module); + return PyModuleDef_Init(&sha2module_def); } diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 20510add09b8d4..2414fecdf07159 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -653,7 +653,7 @@ sha3module_exec(PyObject *m) return 0; } -static PyModuleDef_Slot _sha3_slots[] = { +static PyModuleDef_Slot sha3module_slots[] = { {Py_mod_exec, sha3module_exec}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, @@ -661,11 +661,11 @@ static PyModuleDef_Slot _sha3_slots[] = { }; /* Initialize this module. */ -static struct PyModuleDef _sha3module = { +static struct PyModuleDef sha3module_def = { PyModuleDef_HEAD_INIT, .m_name = "_sha3", .m_size = sizeof(sha3module_state), - .m_slots = _sha3_slots, + .m_slots = sha3module_slots, .m_traverse = sha3module_traverse, .m_clear = sha3module_clear, .m_free = sha3module_free, @@ -675,5 +675,5 @@ static struct PyModuleDef _sha3module = { PyMODINIT_FUNC PyInit__sha3(void) { - return PyModuleDef_Init(&_sha3module); + return PyModuleDef_Init(&sha3module_def); } From abfa1afdca38b49095211f10843046af14a8cff3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 15 Jun 2025 13:11:15 +0200 Subject: [PATCH 14/41] update clinic directives --- Modules/blake2module.c | 108 ++++++++++++++++++++++------------------- Modules/hmacmodule.c | 2 +- Modules/md5module.c | 53 ++++++++++++-------- Modules/sha1module.c | 50 ++++++++++++------- Modules/sha2module.c | 67 ++++++++++++++----------- Modules/sha3module.c | 44 +++++++++++------ 6 files changed, 194 insertions(+), 130 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 396135263ac80e..b0877fadc4f99a 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -2,6 +2,7 @@ * Written in 2013 by Dmitry Chestnykh * Modified for CPython by Christian Heimes * Updated to use HACL* by Jonathan Protzenko + * Refactored by Bénédikt Tran <10796600+picnixz@users.noreply.github.com> * * To the extent possible under law, the author have dedicated all * copyright and related and neighboring rights to this software to @@ -64,13 +65,7 @@ #include "_hacl/Hacl_Hash_Blake2b_Simd256.h" #endif -// MODULE TYPE SLOTS - -static PyType_Spec blake2b_type_spec; -static PyType_Spec blake2s_type_spec; - -PyDoc_STRVAR(blake2module__doc__, - "_blake2 provides BLAKE2b and BLAKE2s for hashlib\n"); +// --- BLAKE-2 module state --------------------------------------------------- typedef struct { PyTypeObject *blake2b_type; @@ -87,15 +82,63 @@ get_blake2module_state(PyObject *module) return (blake2module_state *)state; } -#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) static inline blake2module_state * -blake2_get_state_from_type(PyTypeObject *module) +get_blake2module_state_by_cls(PyTypeObject *cls) { - void *state = _PyType_GetModuleState(module); + void *state = _PyType_GetModuleState(cls); assert(state != NULL); return (blake2module_state *)state; } + +// --- BLAKE-2 object --------------------------------------------------------- + +// The HACL* API does not offer an agile API that can deal with either Blake2S +// or Blake2B -- the reason is that the underlying states are optimized (uint32s +// for S, uint64s for B). Therefore, we use a tagged union in this module to +// correctly dispatch. Note that the previous incarnation of this code +// transformed the Blake2b implementation into the Blake2s one using a script, +// so this is an improvement. +// +// The 128 and 256 versions are only available if i) we were able to compile +// them, and ii) if the CPU we run on also happens to have the right instruction +// set. +typedef enum { Blake2s, Blake2b, Blake2s_128, Blake2b_256 } blake2_impl; + +typedef struct { + PyObject_HEAD + HASHLIB_MUTEX_API + union { + Hacl_Hash_Blake2s_state_t *blake2s_state; + Hacl_Hash_Blake2b_state_t *blake2b_state; +#if HACL_CAN_COMPILE_SIMD128 + Hacl_Hash_Blake2s_Simd128_state_t *blake2s_128_state; #endif +#if HACL_CAN_COMPILE_SIMD256 + Hacl_Hash_Blake2b_Simd256_state_t *blake2b_256_state; +#endif + }; + blake2_impl impl; +} Blake2Object; + +#define _Blake2Object_CAST(op) ((Blake2Object *)(op)) + +// --- BLAKE-2 module clinic configuration ------------------------------------ + +/*[clinic input] +module _blake2 +class _blake2.blake2b "Blake2Object *" "clinic_state()->blake2b_type" +class _blake2.blake2s "Blake2Object *" "clinic_state()->blake2s_type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7e2b2b3b67a72f18]*/ + +#define clinic_state() (get_blake2module_state_by_cls(Py_TYPE(self))) +#include "clinic/blake2module.c.h" +#undef clinic_state + +// MODULE TYPE SLOTS + +static PyType_Spec blake2b_type_spec; +static PyType_Spec blake2s_type_spec; static int blake2module_traverse(PyObject *module, visitproc visit, void *arg) @@ -280,6 +323,9 @@ static PyModuleDef_Slot blake2module_slots[] = { {0, NULL} }; +PyDoc_STRVAR(blake2module__doc__, + "_blake2 provides BLAKE2b and BLAKE2s for hashlib\n"); + static struct PyModuleDef blake2module_def = { PyModuleDef_HEAD_INIT, .m_name = "_blake2", @@ -299,18 +345,6 @@ PyInit__blake2(void) // IMPLEMENTATION OF METHODS -// The HACL* API does not offer an agile API that can deal with either Blake2S -// or Blake2B -- the reason is that the underlying states are optimized (uint32s -// for S, uint64s for B). Therefore, we use a tagged union in this module to -// correctly dispatch. Note that the previous incarnation of this code -// transformed the Blake2b implementation into the Blake2s one using a script, -// so this is an improvement. -// -// The 128 and 256 versions are only available if i) we were able to compile -// them, and ii) if the CPU we run on also happens to have the right instruction -// set. -typedef enum { Blake2s, Blake2b, Blake2s_128, Blake2b_256 } blake2_impl; - static inline bool is_blake2b(blake2_impl impl) { @@ -327,7 +361,7 @@ static inline blake2_impl type_to_impl(PyTypeObject *type) { #if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) - blake2module_state *state = blake2_get_state_from_type(type); + blake2module_state *state = get_blake2module_state_by_cls(type); #endif if (!strcmp(type->tp_name, blake2b_type_spec.name)) { #if HACL_CAN_COMPILE_SIMD256 @@ -346,34 +380,6 @@ type_to_impl(PyTypeObject *type) Py_UNREACHABLE(); } -typedef struct { - PyObject_HEAD - HASHLIB_MUTEX_API - union { - Hacl_Hash_Blake2s_state_t *blake2s_state; - Hacl_Hash_Blake2b_state_t *blake2b_state; -#if HACL_CAN_COMPILE_SIMD128 - Hacl_Hash_Blake2s_Simd128_state_t *blake2s_128_state; -#endif -#if HACL_CAN_COMPILE_SIMD256 - Hacl_Hash_Blake2b_Simd256_state_t *blake2b_256_state; -#endif - }; - blake2_impl impl; -} Blake2Object; - -#define _Blake2Object_CAST(op) ((Blake2Object *)(op)) - -#include "clinic/blake2module.c.h" - -/*[clinic input] -module _blake2 -class _blake2.blake2b "Blake2Object *" "&PyBlake2_BLAKE2bType" -class _blake2.blake2s "Blake2Object *" "&PyBlake2_BLAKE2sType" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b7526666bd18af83]*/ - - static Blake2Object * new_Blake2Object(PyTypeObject *type) { diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 30cc9db5540d82..d789b976a1abfd 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -377,7 +377,7 @@ get_hmacmodule_state_by_cls(PyTypeObject *cls) return (hmacmodule_state *)state; } -// --- HMAC Object ------------------------------------------------------------ +// --- HMAC object ------------------------------------------------------------ typedef Hacl_Streaming_HMAC_agile_state HACL_HMAC_state; diff --git a/Modules/md5module.c b/Modules/md5module.c index 2c4bc970f0defd..cec29c1472900c 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -8,6 +8,7 @@ Andrew Kuchling (amk@amk.ca) Greg Stein (gstein@lyra.org) Trevor Perrin (trevp@trevp.net) + Bénédikt Tran (10796600+picnixz@users.noreply.github.com) Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) Licensed to PSF under a Contributor Agreement. @@ -23,30 +24,14 @@ #include "Python.h" #include "hashlib.h" -/*[clinic input] -module _md5 -class MD5Type "MD5object *" "&PyType_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/ +#include "_hacl/Hacl_Hash_MD5.h" /* The MD5 block size and message digest sizes, in bytes */ #define MD5_BLOCKSIZE 64 #define MD5_DIGESTSIZE 16 -#include "_hacl/Hacl_Hash_MD5.h" - - -typedef struct { - PyObject_HEAD - HASHLIB_MUTEX_API - Hacl_Hash_MD5_state_t *state; -} MD5object; - -#define _MD5object_CAST(op) ((MD5object *)(op)) - -#include "clinic/md5module.c.h" - +// --- MD5 module state ------------------------------------------------------- typedef struct { PyTypeObject *md5_type; @@ -60,6 +45,36 @@ get_md5module_state(PyObject *module) return (md5module_state *)state; } +static inline md5module_state * +get_md5module_state_by_cls(PyTypeObject *cls) +{ + void *state = PyType_GetModuleState(cls); + assert(state != NULL); + return (md5module_state *)state; +} + +// --- MD5 object ------------------------------------------------------------- + +typedef struct { + PyObject_HEAD + HASHLIB_MUTEX_API + Hacl_Hash_MD5_state_t *state; +} MD5object; + +#define _MD5object_CAST(op) ((MD5object *)(op)) + +// --- MD5 module clinic configuration ---------------------------------------- + +/*[clinic input] +module _md5 +class MD5Type "MD5object *" "clinic_state()->md5_type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5451859a6c7e20d]*/ + +#define clinic_state() (get_md5module_state_by_cls(Py_TYPE(self))) +#include "clinic/md5module.c.h" +#undef clinic_state + static MD5object * newMD5object(md5module_state *state) { @@ -107,7 +122,7 @@ static PyObject * MD5Type_copy_impl(MD5object *self, PyTypeObject *cls) /*[clinic end generated code: output=bf055e08244bf5ee input=d89087dcfb2a8620]*/ { - md5module_state *state = PyType_GetModuleState(cls); + md5module_state *state = get_md5module_state_by_cls(cls); MD5object *newobj; if ((newobj = newMD5object(state)) == NULL) { diff --git a/Modules/sha1module.c b/Modules/sha1module.c index db95f059f49798..d72365f898fa13 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -8,6 +8,7 @@ Andrew Kuchling (amk@amk.ca) Greg Stein (gstein@lyra.org) Trevor Perrin (trevp@trevp.net) + Bénédikt Tran (10796600+picnixz@users.noreply.github.com) Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) Licensed to PSF under a Contributor Agreement. @@ -24,12 +25,6 @@ #include "pycore_strhex.h" // _Py_strhex() #include "pycore_typeobject.h" // _PyType_GetModuleState() -/*[clinic input] -module _sha1 -class SHA1Type "SHA1object *" "&PyType_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dc9a20d1becb759]*/ - /* The SHA1 block size and message digest sizes, in bytes */ #define SHA1_BLOCKSIZE 64 @@ -37,16 +32,7 @@ class SHA1Type "SHA1object *" "&PyType_Type" #include "_hacl/Hacl_Hash_SHA1.h" -typedef struct { - PyObject_HEAD - HASHLIB_MUTEX_API - Hacl_Hash_SHA1_state_t *state; -} SHA1object; - -#define _SHA1object_CAST(op) ((SHA1object *)(op)) - -#include "clinic/sha1module.c.h" - +// --- SHA-1 module state ----------------------------------------------------- typedef struct { PyTypeObject *sha1_type; @@ -60,6 +46,36 @@ get_sha1module_state(PyObject *module) return (sha1module_state *)state; } +static inline sha1module_state * +get_sha1module_state_by_cls(PyTypeObject *cls) +{ + void *state = PyType_GetModuleState(cls); + assert(state != NULL); + return (sha1module_state *)state; +} + +// --- SHA-1 object ----------------------------------------------------------- + +typedef struct { + PyObject_HEAD + HASHLIB_MUTEX_API + Hacl_Hash_SHA1_state_t *state; +} SHA1object; + +#define _SHA1object_CAST(op) ((SHA1object *)(op)) + +// --- SHA-1 module clinic configuration -------------------------------------- + +/*[clinic input] +module _sha1 +class SHA1Type "SHA1object *" "clinic_state()->sha1_type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=afc62adaf06c713f]*/ + +#define clinic_state() (get_sha1module_state_by_cls(Py_TYPE(self))) +#include "clinic/sha1module.c.h" +#undef clinic_state + static SHA1object * newSHA1object(sha1module_state *state) { @@ -111,7 +127,7 @@ static PyObject * SHA1Type_copy_impl(SHA1object *self, PyTypeObject *cls) /*[clinic end generated code: output=b32d4461ce8bc7a7 input=6c22e66fcc34c58e]*/ { - sha1module_state *state = _PyType_GetModuleState(cls); + sha1module_state *state = get_sha1module_state_by_cls(cls); SHA1object *newobj; if ((newobj = newSHA1object(state)) == NULL) { diff --git a/Modules/sha2module.c b/Modules/sha2module.c index 6ab53a96989c41..670844b23060e1 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -9,6 +9,7 @@ Greg Stein (gstein@lyra.org) Trevor Perrin (trevp@trevp.net) Jonathan Protzenko (jonathan@protzenko.fr) + Bénédikt Tran (10796600+picnixz@users.noreply.github.com) Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) Licensed to PSF under a Contributor Agreement. @@ -28,14 +29,9 @@ #include "hashlib.h" -/*[clinic input] -module _sha2 -class SHA256Type "SHA256object *" "&PyType_Type" -class SHA512Type "SHA512object *" "&PyType_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5315a7b611c9afc]*/ - +#include "_hacl/Hacl_Hash_SHA2.h" +// TODO: Get rid of int digestsize in favor of Hacl state info? /* The SHA block sizes and maximum message digest sizes, in bytes */ #define SHA256_BLOCKSIZE 64 @@ -43,11 +39,34 @@ class SHA512Type "SHA512object *" "&PyType_Type" #define SHA512_BLOCKSIZE 128 #define SHA512_DIGESTSIZE 64 -/* Our SHA2 implementations defer to the HACL* verified library. */ +// --- SHA-2 module state ----------------------------------------------------- -#include "_hacl/Hacl_Hash_SHA2.h" +/* We shall use run-time type information in the remainder of this module to + * tell apart SHA2-224 and SHA2-256 */ +typedef struct { + PyTypeObject *sha224_type; + PyTypeObject *sha256_type; + PyTypeObject *sha384_type; + PyTypeObject *sha512_type; +} sha2module_state; -// TODO: Get rid of int digestsize in favor of Hacl state info? +static inline sha2module_state * +get_sha2module_state(PyObject *module) +{ + void *state = _PyModule_GetState(module); + assert(state != NULL); + return (sha2module_state *)state; +} + +static inline sha2module_state * +get_sha2module_state_by_cls(PyTypeObject *cls) +{ + void *state = PyType_GetModuleState(cls); + assert(state != NULL); + return (sha2module_state *)state; +} + +// --- SHA-2 object ----------------------------------------------------------- typedef struct { PyObject_HEAD @@ -66,24 +85,18 @@ typedef struct { #define _SHA256object_CAST(op) ((SHA256object *)(op)) #define _SHA512object_CAST(op) ((SHA512object *)(op)) -#include "clinic/sha2module.c.h" +// --- SHA-2 module clinic configuration -------------------------------------- -/* We shall use run-time type information in the remainder of this module to - * tell apart SHA2-224 and SHA2-256 */ -typedef struct { - PyTypeObject *sha224_type; - PyTypeObject *sha256_type; - PyTypeObject *sha384_type; - PyTypeObject *sha512_type; -} sha2module_state; +/*[clinic input] +module _sha2 +class SHA256Type "SHA256object *" "clinic_state()->sha256_type" +class SHA512Type "SHA512object *" "clinic_state()->sha512_type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e758ed2b54d457ea]*/ -static inline sha2module_state * -get_sha2module_state(PyObject *module) -{ - void *state = _PyModule_GetState(module); - assert(state != NULL); - return (sha2module_state *)state; -} +#define clinic_state() (get_sha2module_state_by_cls(Py_TYPE(self))) +#include "clinic/sha2module.c.h" +#undef clinic_state static int SHA256copy(SHA256object *src, SHA256object *dest) @@ -256,7 +269,7 @@ SHA256Type_copy_impl(SHA256object *self, PyTypeObject *cls) { int rc; SHA256object *newobj; - sha2module_state *state = _PyType_GetModuleState(cls); + sha2module_state *state = get_sha2module_state_by_cls(cls); if (Py_IS_TYPE(self, state->sha256_type)) { if ((newobj = newSHA256object(state)) == NULL) { return NULL; diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 2414fecdf07159..9a7115843c0d28 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -9,6 +9,7 @@ * Greg Stein (gstein@lyra.org) * Trevor Perrin (trevp@trevp.net) * Gregory P. Smith (greg@krypto.org) + * Bénédikt Tran (10796600+picnixz@users.noreply.github.com) * * Copyright (C) 2012-2022 Christian Heimes (christian@python.org) * Licensed to PSF under a Contributor Agreement. @@ -24,8 +25,12 @@ #include "pycore_typeobject.h" // _PyType_GetModuleState() #include "hashlib.h" +#include "_hacl/Hacl_Hash_SHA3.h" + #define SHA3_MAX_DIGESTSIZE 64 /* 64 Bytes (512 Bits) for 224 to 512 */ +// --- SHA-3 module state ----------------------------------------------------- + typedef struct { PyTypeObject *sha3_224_type; PyTypeObject *sha3_256_type; @@ -44,20 +49,15 @@ get_sha3module_state(PyObject *module) return (sha3module_state *)state; } -/*[clinic input] -module _sha3 -class _sha3.sha3_224 "SHA3object *" "&SHA3_224typ" -class _sha3.sha3_256 "SHA3object *" "&SHA3_256typ" -class _sha3.sha3_384 "SHA3object *" "&SHA3_384typ" -class _sha3.sha3_512 "SHA3object *" "&SHA3_512typ" -class _sha3.shake_128 "SHA3object *" "&SHAKE128type" -class _sha3.shake_256 "SHA3object *" "&SHAKE256type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b8a53680f370285a]*/ - -/* The structure for storing SHA3 info */ +static inline sha3module_state * +get_sha3module_state_by_cls(PyTypeObject *cls) +{ + void *state = PyType_GetModuleState(cls); + assert(state != NULL); + return (sha3module_state *)state; +} -#include "_hacl/Hacl_Hash_SHA3.h" +// --- SHA-3 object ----------------------------------------------------------- typedef struct { PyObject_HEAD @@ -67,7 +67,22 @@ typedef struct { #define _SHA3object_CAST(op) ((SHA3object *)(op)) +// --- SHA-3 module clinic configuration -------------------------------------- + +/*[clinic input] +module _sha3 +class _sha3.sha3_224 "SHA3object *" "clinic_state()->sha3_224_type" +class _sha3.sha3_256 "SHA3object *" "clinic_state()->sha3_256_type" +class _sha3.sha3_384 "SHA3object *" "clinic_state()->sha3_384_type" +class _sha3.sha3_512 "SHA3object *" "clinic_state()->sha3_512_type" +class _sha3.shake_128 "SHA3object *" "clinic_state()->shake_128_type" +class _sha3.shake_256 "SHA3object *" "clinic_state()->shake_256_type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=83376ec869f33016]*/ + +#define clinic_state() (get_sha3module_state_by_cls(Py_TYPE(self))) #include "clinic/sha3module.c.h" +#undef clinic_state static SHA3object * newSHA3object(PyTypeObject *type) @@ -346,8 +361,7 @@ SHA3_get_name(PyObject *self, void *Py_UNUSED(closure)) { PyTypeObject *type = Py_TYPE(self); - sha3module_state *state = _PyType_GetModuleState(type); - assert(state != NULL); + sha3module_state *state = get_sha3module_state_by_cls(type); if (type == state->sha3_224_type) { return PyUnicode_FromString("sha3_224"); From 902759fb854c40aeefb56f2d274ca7004a8ec476 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 12:15:23 +0200 Subject: [PATCH 15/41] unconditionally lock when performing HASH updates --- Lib/test/support/hashlib_helper.py | 19 -- Lib/test/test_hashlib.py | 36 +--- Lib/test/test_hmac.py | 24 --- Modules/_hashopenssl.c | 92 +++------ Modules/blake2module.c | 47 ++--- Modules/hashlib.h | 39 +--- Modules/hmacmodule.c | 307 ++++++++--------------------- Modules/md5module.c | 89 +++------ Modules/sha1module.c | 52 ++--- Modules/sha2module.c | 125 ++++-------- Modules/sha3module.c | 42 ++-- 11 files changed, 228 insertions(+), 644 deletions(-) diff --git a/Lib/test/support/hashlib_helper.py b/Lib/test/support/hashlib_helper.py index 7032257b06877a..c0d5da042d8bcf 100644 --- a/Lib/test/support/hashlib_helper.py +++ b/Lib/test/support/hashlib_helper.py @@ -308,22 +308,3 @@ def sha3_384(self): @property def sha3_512(self): return self._find_constructor_in("_sha3","sha3_512") - - -def find_gil_minsize(modules_names, default=2048): - """Get the largest GIL_MINSIZE value for the given cryptographic modules. - - The valid module names are the following: - - - _hashlib - - _md5, _sha1, _sha2, _sha3, _blake2 - - _hmac - """ - sizes = [] - for module_name in modules_names: - try: - module = importlib.import_module(module_name) - except ImportError: - continue - sizes.append(getattr(module, '_GIL_MINSIZE', default)) - return max(sizes, default=default) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index b83ae181718b7a..a9469504b504ed 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -409,7 +409,7 @@ def test_large_update(self): aas = b'a' * 128 bees = b'b' * 127 cees = b'c' * 126 - dees = b'd' * 2048 # HASHLIB_GIL_MINSIZE + dees = b'd' * 2048 for cons in self.hash_constructors: m1 = cons(usedforsecurity=False) @@ -990,40 +990,6 @@ def test_case_shake256_vector(self): for msg, md in read_vectors('shake_256'): self.check('shake_256', msg, md, True) - def test_gil(self): - # Check things work fine with an input larger than the size required - # for multithreaded operation. Currently, all cryptographic modules - # have the same constant value (2048) but in the future it might not - # be the case. - mods = ['_md5', '_sha1', '_sha2', '_sha3', '_blake2', '_hashlib'] - gil_minsize = hashlib_helper.find_gil_minsize(mods) - for cons in self.hash_constructors: - # constructors belong to one of the above modules - m = cons(usedforsecurity=False) - m.update(b'1') - m.update(b'#' * gil_minsize) - m.update(b'1') - - m = cons(b'x' * gil_minsize, usedforsecurity=False) - m.update(b'1') - - def test_sha256_gil(self): - gil_minsize = hashlib_helper.find_gil_minsize(['_sha2', '_hashlib']) - m = hashlib.sha256() - m.update(b'1') - m.update(b'#' * gil_minsize) - m.update(b'1') - self.assertEqual( - m.hexdigest(), - '1cfceca95989f51f658e3f3ffe7f1cd43726c9e088c13ee10b46f57cef135b94' - ) - - m = hashlib.sha256(b'1' + b'#' * gil_minsize + b'1') - self.assertEqual( - m.hexdigest(), - '1cfceca95989f51f658e3f3ffe7f1cd43726c9e088c13ee10b46f57cef135b94' - ) - @threading_helper.reap_threads @threading_helper.requires_working_threading() def test_threaded_hashing(self): diff --git a/Lib/test/test_hmac.py b/Lib/test/test_hmac.py index ff6e1bce0ef801..d1f4662adbb618 100644 --- a/Lib/test/test_hmac.py +++ b/Lib/test/test_hmac.py @@ -1133,11 +1133,6 @@ def HMAC(self, key, msg=None): """Create a HMAC object.""" raise NotImplementedError - @property - def gil_minsize(self): - """Get the maximal input length for the GIL to be held.""" - raise NotImplementedError - def check_update(self, key, chunks): chunks = list(chunks) msg = b''.join(chunks) @@ -1155,13 +1150,6 @@ def test_update(self): with self.subTest(key=key, msg=msg): self.check_update(key, [msg]) - def test_update_large(self): - gil_minsize = self.gil_minsize - key = random.randbytes(16) - top = random.randbytes(gil_minsize + 1) - bot = random.randbytes(gil_minsize + 1) - self.check_update(key, [top, bot]) - def test_update_exceptions(self): h = self.HMAC(b"key") for msg in ['invalid msg', 123, (), []]: @@ -1175,10 +1163,6 @@ class PyUpdateTestCase(PyModuleMixin, UpdateTestCaseMixin, unittest.TestCase): def HMAC(self, key, msg=None): return self.hmac.HMAC(key, msg, digestmod='sha256') - @property - def gil_minsize(self): - return sha2._GIL_MINSIZE - @hashlib_helper.requires_openssl_hashdigest('sha256') class OpenSSLUpdateTestCase(UpdateTestCaseMixin, unittest.TestCase): @@ -1186,10 +1170,6 @@ class OpenSSLUpdateTestCase(UpdateTestCaseMixin, unittest.TestCase): def HMAC(self, key, msg=None): return _hashlib.hmac_new(key, msg, digestmod='sha256') - @property - def gil_minsize(self): - return _hashlib._GIL_MINSIZE - class BuiltinUpdateTestCase(BuiltinModuleMixin, UpdateTestCaseMixin, unittest.TestCase): @@ -1199,10 +1179,6 @@ def HMAC(self, key, msg=None): # are still built, making it possible to use SHA-2 hashes. return self.hmac.new(key, msg, digestmod='sha256') - @property - def gil_minsize(self): - return self.hmac._GIL_MINSIZE - class CopyBaseTestCase: diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 50cf3c57491049..d4c7cc02ccbea3 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -611,9 +611,9 @@ static int _hashlib_HASH_copy_locked(HASHobject *self, EVP_MD_CTX *new_ctx_p) { int result; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); result = EVP_MD_CTX_copy(new_ctx_p, self->ctx); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return result; } @@ -733,29 +733,16 @@ static PyObject * _hashlib_HASH_update_impl(HASHobject *self, PyObject *obj) /*[clinic end generated code: output=62ad989754946b86 input=aa1ce20e3f92ceb6]*/ { - int result; + int rc; Py_buffer view; - GET_BUFFER_VIEW_OR_ERROUT(obj, &view); - - if (!self->use_mutex && view.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - result = _hashlib_HASH_hash(self, view.buf, view.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - result = _hashlib_HASH_hash(self, view.buf, view.len); - } - + Py_BEGIN_ALLOW_THREADS + HASHLIB_ACQUIRE_LOCK(self); + rc = _hashlib_HASH_hash(self, view.buf, view.len); + HASHLIB_RELEASE_LOCK(self); + Py_END_ALLOW_THREADS PyBuffer_Release(&view); - - if (result == -1) - return NULL; - Py_RETURN_NONE; + return rc < 0 ? NULL : Py_None; } static PyMethodDef HASH_methods[] = { @@ -1060,15 +1047,11 @@ _hashlib_HASH(PyObject *module, const char *digestname, PyObject *data_obj, } if (view.buf && view.len) { - if (view.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - result = _hashlib_HASH_hash(self, view.buf, view.len); - Py_END_ALLOW_THREADS - } else { + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS result = _hashlib_HASH_hash(self, view.buf, view.len); - } + Py_END_ALLOW_THREADS if (result == -1) { assert(PyErr_Occurred()); Py_CLEAR(self); @@ -1701,7 +1684,7 @@ _hashlib_hmac_new_impl(PyObject *module, Py_buffer *key, PyObject *msg_obj, HASHLIB_INIT_MUTEX(self); if ((msg_obj != NULL) && (msg_obj != Py_None)) { - if (!_hmac_update(self, msg_obj)) { + if (_hmac_update(self, msg_obj) < 0) { goto error; } } @@ -1718,9 +1701,9 @@ static int locked_HMAC_CTX_copy(HMAC_CTX *new_ctx_p, HMACobject *self) { int result; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); result = HMAC_CTX_copy(new_ctx_p, self->ctx); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return result; } @@ -1743,35 +1726,26 @@ _hashlib_hmac_digest_size(HMACobject *self) static int _hmac_update(HMACobject *self, PyObject *obj) { - int r; + int r = 1; Py_buffer view = {0}; - GET_BUFFER_VIEW_OR_ERROR(obj, &view, return 0); - - if (!self->use_mutex && view.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { + GET_BUFFER_VIEW_OR_ERROR(obj, &view, return -1); + if (view.len > 0) { Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - r = HMAC_Update(self->ctx, - (const unsigned char *)view.buf, - (size_t)view.len); - PyMutex_Unlock(&self->mutex); + HASHLIB_ACQUIRE_LOCK(self); + r = HMAC_Update(self->ctx, + (const unsigned char *)view.buf, + (size_t)view.len); + HASHLIB_RELEASE_LOCK(self); Py_END_ALLOW_THREADS - } else { - r = HMAC_Update(self->ctx, - (const unsigned char *)view.buf, - (size_t)view.len); } - PyBuffer_Release(&view); if (r == 0) { notify_ssl_error_occurred(); - return 0; + return -1; } - return 1; + return 0; } /*[clinic input] @@ -1845,7 +1819,7 @@ static PyObject * _hashlib_HMAC_update_impl(HMACobject *self, PyObject *msg) /*[clinic end generated code: output=f31f0ace8c625b00 input=1829173bb3cfd4e6]*/ { - if (!_hmac_update(self, msg)) { + if (_hmac_update(self, msg) < 0) { return NULL; } Py_RETURN_NONE; @@ -2412,17 +2386,6 @@ hashlib_exception(PyObject *module) return 0; } -static int -hashlib_constants(PyObject *module) -{ - if (PyModule_AddIntConstant(module, "_GIL_MINSIZE", - HASHLIB_GIL_MINSIZE) < 0) - { - return -1; - } - return 0; -} - static PyModuleDef_Slot hashlib_slots[] = { {Py_mod_exec, hashlib_init_hashtable}, {Py_mod_exec, hashlib_init_HASH_type}, @@ -2431,7 +2394,6 @@ static PyModuleDef_Slot hashlib_slots[] = { {Py_mod_exec, hashlib_md_meth_names}, {Py_mod_exec, hashlib_init_constructors}, {Py_mod_exec, hashlib_exception}, - {Py_mod_exec, hashlib_constants}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL} diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 07aa89f573f05f..5f02344ab6e285 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -229,8 +229,6 @@ blake2_exec(PyObject *m) // good a place as any to probe the CPU flags. detect_cpu_features(&st->flags); - ADD_INT_CONST("_GIL_MINSIZE", HASHLIB_GIL_MINSIZE); - st->blake2b_type = (PyTypeObject *)PyType_FromModuleAndSpec( m, &blake2b_type_spec, NULL); @@ -422,7 +420,7 @@ new_Blake2Object(PyTypeObject *type) } while (0) static void -update(Blake2Object *self, uint8_t *buf, Py_ssize_t len) +blake2_update_state_unlocked(Blake2Object *self, uint8_t *buf, Py_ssize_t len) { switch (self->impl) { // These need to be ifdef'd out otherwise it's an unresolved symbol at @@ -634,15 +632,11 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, /* Process initial data if any. */ if (data != NULL) { GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); - - if (buf.len >= HASHLIB_GIL_MINSIZE) { + if (buf.len > 0) { Py_BEGIN_ALLOW_THREADS - update(self, buf.buf, buf.len); + blake2_update_state_unlocked(self, buf.buf, buf.len); Py_END_ALLOW_THREADS } - else { - update(self, buf.buf, buf.len); - } PyBuffer_Release(&buf); } @@ -793,9 +787,9 @@ _blake2_blake2b_copy_impl(Blake2Object *self) return NULL; } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); rc = blake2_blake2b_copy_locked(self, cpy); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (rc < 0) { Py_DECREF(cpy); return NULL; @@ -817,24 +811,15 @@ _blake2_blake2b_update_impl(Blake2Object *self, PyObject *data) /*[clinic end generated code: output=99330230068e8c99 input=ffc4aa6a6a225d31]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { + if (buf.len > 0) { Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update(self, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); + HASHLIB_ACQUIRE_LOCK(self); + blake2_update_state_unlocked(self, buf.buf, buf.len); + HASHLIB_RELEASE_LOCK(self); Py_END_ALLOW_THREADS - } else { - update(self, buf.buf, buf.len); } - PyBuffer_Release(&buf); - Py_RETURN_NONE; } @@ -849,9 +834,9 @@ _blake2_blake2b_digest_impl(Blake2Object *self) /*[clinic end generated code: output=31ab8ad477f4a2f7 input=7d21659e9c5fff02]*/ { uint8_t digest[HACL_HASH_BLAKE2B_OUT_BYTES]; - - ENTER_HASHLIB(self); uint8_t digest_length = 0; + + HASHLIB_ACQUIRE_LOCK(self); switch (self->impl) { #if HACL_CAN_COMPILE_SIMD256 case Blake2b_256: @@ -870,9 +855,10 @@ _blake2_blake2b_digest_impl(Blake2Object *self) digest_length = Hacl_Hash_Blake2s_digest(self->blake2s_state, digest); break; default: + HASHLIB_RELEASE_LOCK(self); Py_UNREACHABLE(); } - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, digest_length); } @@ -887,9 +873,9 @@ _blake2_blake2b_hexdigest_impl(Blake2Object *self) /*[clinic end generated code: output=5ef54b138db6610a input=76930f6946351f56]*/ { uint8_t digest[HACL_HASH_BLAKE2B_OUT_BYTES]; - - ENTER_HASHLIB(self); uint8_t digest_length = 0; + + HASHLIB_ACQUIRE_LOCK(self); switch (self->impl) { #if HACL_CAN_COMPILE_SIMD256 case Blake2b_256: @@ -908,9 +894,10 @@ _blake2_blake2b_hexdigest_impl(Blake2Object *self) digest_length = Hacl_Hash_Blake2s_digest(self->blake2s_state, digest); break; default: + HASHLIB_RELEASE_LOCK(self); Py_UNREACHABLE(); } - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, digest_length); } diff --git a/Modules/hashlib.h b/Modules/hashlib.h index fe12acb27ce036..43d53442666fb7 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -50,48 +50,17 @@ #include "pythread.h" -#define HASHLIB_LOCK_HEAD \ - /* - * Attributes to prevent undefined behaviors - * via multiple threads entering the C API. - */ \ - bool use_mutex; \ +#define HASHLIB_LOCK_HEAD \ + /* Guard against race conditions during incremental update(). */ \ PyMutex mutex; -#define HASHLIB_SET_MUTEX_POLICY(OBJ, VALUE) \ - _Py_atomic_store_int_relaxed((int *)&(OBJ)->use_mutex, (int)(VALUE)) +#define HASHLIB_ACQUIRE_LOCK(OBJ) PyMutex_Lock(&(OBJ)->mutex) +#define HASHLIB_RELEASE_LOCK(OBJ) PyMutex_Unlock(&(OBJ)->mutex) -#define ENTER_HASHLIB(OBJ) \ - do { \ - if (_Py_atomic_load_int_relaxed((const int *)&(OBJ)->use_mutex)) { \ - PyMutex_Lock(&(OBJ)->mutex); \ - } \ - } while (0) - -#define LEAVE_HASHLIB(OBJ) \ - do { \ - if (_Py_atomic_load_int_relaxed((const int *)&(OBJ)->use_mutex)) { \ - PyMutex_Unlock(&(OBJ)->mutex); \ - } \ - } while (0) - -#ifdef Py_GIL_DISABLED #define HASHLIB_INIT_MUTEX(OBJ) \ do { \ (OBJ)->mutex = (PyMutex){0}; \ - (OBJ)->use_mutex = true; \ } while (0) -#else -#define HASHLIB_INIT_MUTEX(OBJ) \ - do { \ - (OBJ)->mutex = (PyMutex){0}; \ - (OBJ)->use_mutex = false; \ - } while (0) -#endif - -/* TODO(gpshead): We should make this a module or class attribute - * to allow the user to optimize based on the platform they're using. */ -#define HASHLIB_GIL_MINSIZE 2048 static inline int _Py_hashlib_data_argument(PyObject **res, PyObject *data, PyObject *string) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index c7b49d4dee3d0a..36744afea6476e 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -215,105 +215,6 @@ typedef struct py_hmac_hacl_api { #define Py_CHECK_HACL_UINT32_T_LENGTH(LEN) #endif -/* - * Call the HACL* HMAC-HASH update function on the given data. - * - * The magnitude of 'LEN' is not checked and thus 'LEN' must be - * safely convertible to a uint32_t value. - */ -#define Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, LEN) \ - Hacl_Streaming_HMAC_update(HACL_STATE, BUF, (uint32_t)(LEN)) - -/* - * Call the HACL* HMAC-HASH update function on the given data. - * - * On DEBUG builds, the 'ERRACTION' statements are executed if - * the update() call returned a non-successful HACL* exit code. - * - * The buffer 'BUF' and its length 'LEN' are left untouched. - * - * The formal signature of this macro is: - * - * (HACL_HMAC_state *, uint8_t *, uint32_t, PyObject *, (C statements)) - */ -#ifndef NDEBUG -#define Py_HMAC_HACL_UPDATE_ONCE( \ - HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION \ -) \ - do { \ - Py_CHECK_HACL_UINT32_T_LENGTH(LEN); \ - hacl_errno_t code = Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, LEN); \ - if (_hacl_convert_errno(code, (ALGORITHM)) < 0) { \ - ERRACTION; \ - } \ - } while (0) -#else -#define Py_HMAC_HACL_UPDATE_ONCE( \ - HACL_STATE, BUF, LEN, \ - _ALGORITHM, _ERRACTION \ -) \ - do { \ - (void)Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, (LEN)); \ - } while (0) -#endif - -/* - * Repetivively call the HACL* HMAC-HASH update function on the given - * data until the buffer length 'LEN' is strictly less than UINT32_MAX. - * - * On builds with PY_SSIZE_T_MAX <= UINT32_MAX, this is a no-op. - * - * The buffer 'BUF' (resp. 'LEN') is advanced (resp. decremented) - * by UINT32_MAX after each update. On DEBUG builds, each update() - * call is verified and the 'ERRACTION' statements are executed if - * a non-successful HACL* exit code is being returned. - * - * In particular, 'BUF' and 'LEN' must be variable names and not - * expressions on their own. - * - * The formal signature of this macro is: - * - * (HACL_HMAC_state *, uint8_t *, C integer, PyObject *, (C statements)) - */ -#ifdef Py_HMAC_SSIZE_LARGER_THAN_UINT32 -#define Py_HMAC_HACL_UPDATE_LOOP( \ - HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION \ -) \ - do { \ - while ((Py_ssize_t)LEN > UINT32_MAX_AS_SSIZE_T) { \ - Py_HMAC_HACL_UPDATE_ONCE(HACL_STATE, BUF, UINT32_MAX, \ - ALGORITHM, ERRACTION); \ - BUF += UINT32_MAX; \ - LEN -= UINT32_MAX; \ - } \ - } while (0) -#else -#define Py_HMAC_HACL_UPDATE_LOOP( \ - HACL_STATE, BUF, LEN, \ - _ALGORITHM, _ERRACTION \ -) -#endif - -/* - * Perform the HMAC-HASH update() operation in a streaming fashion. - * - * The formal signature of this macro is: - * - * (HACL_HMAC_state *, uint8_t *, C integer, PyObject *, (C statements)) - */ -#define Py_HMAC_HACL_UPDATE( \ - HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION \ -) \ - do { \ - Py_HMAC_HACL_UPDATE_LOOP(HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION); \ - Py_HMAC_HACL_UPDATE_ONCE(HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION); \ - } while (0) - /* * HMAC underlying hash function static information. */ @@ -491,17 +392,14 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) * Otherwise, this sets an appropriate exception and returns -1. */ static int -_hacl_convert_errno(hacl_errno_t code, PyObject *algorithm) +_hacl_convert_errno(hacl_errno_t code) { switch (code) { case Hacl_Streaming_Types_Success: { return 0; } case Hacl_Streaming_Types_InvalidAlgorithm: { - // only makes sense if an algorithm is known at call time - assert(algorithm != NULL); - assert(PyUnicode_CheckExact(algorithm)); - PyErr_Format(PyExc_ValueError, "invalid algorithm: %U", algorithm); + PyErr_Format(PyExc_ValueError, "invalid HACL* algorithm"); return -1; } case Hacl_Streaming_Types_InvalidLength: { @@ -536,7 +434,7 @@ _hacl_hmac_state_new(HMAC_Hash_Kind kind, uint8_t *key, uint32_t len) assert(kind != Py_hmac_kind_hash_unknown); HACL_HMAC_state *state = NULL; hacl_errno_t retcode = Hacl_Streaming_HMAC_malloc_(kind, key, len, &state); - if (_hacl_convert_errno(retcode, NULL) < 0) { + if (_hacl_convert_errno(retcode) < 0) { assert(state == NULL); return NULL; } @@ -554,13 +452,60 @@ _hacl_hmac_state_free(HACL_HMAC_state *state) } } +/* + * Call the HACL* HMAC-HASH update function on the given data. + * + * On DEBUG builds, the update() call is verified. + * + * Return 0 on success; otherwise, set an exception and return -1 on failure. +*/ +static int +_hacl_hmac_state_update_once(HACL_HMAC_state *state, + uint8_t *buf, Py_ssize_t len) +{ + assert(len >= 0); +#ifndef NDEBUG + Py_CHECK_HACL_UINT32_T_LENGTH(len); + hacl_errno_t code = Hacl_Streaming_HMAC_update(state, buf, (uint32_t)len); + return _hacl_convert_errno(code); +#else + (void)Hacl_Streaming_HMAC_update(state, buf, (uint32_t)len); + return 0; +#endif +} + +/* + * Perform the HMAC-HASH update() operation in a streaming fashion. + * + * On DEBUG builds, each update() call is verified. + * + * Return 0 on success; otherwise, set an exception and return -1 on failure. + */ +static int +_hacl_hmac_state_update(HACL_HMAC_state *state, uint8_t *buf, Py_ssize_t len) +{ + assert(len >= 0); +#ifdef Py_HMAC_SSIZE_LARGER_THAN_UINT32 + while (len > UINT32_MAX_AS_SSIZE_T) { + if (_hacl_hmac_state_update_once(state, buf, UINT32_MAX)) { + assert(PyErr_Occurred()); + return -1; + } + buf += UINT32_MAX; + len -= UINT32_MAX; + } +#endif + assert(len <= UINT32_MAX_AS_SSIZE_T); + return _hacl_hmac_state_update_once(state, buf, len); +} + /* Static information used to construct the hash table. */ static const py_hmac_hinfo py_hmac_static_hinfo[] = { -#define Py_HMAC_HINFO_HACL_API(HACL_HID) \ - { \ - /* one-shot helpers */ \ - .compute = &Py_hmac_## HACL_HID ##_compute_func, \ - .compute_py = &_hmac_compute_## HACL_HID ##_impl, \ +#define Py_HMAC_HINFO_HACL_API(HACL_HID) \ + { \ + /* one-shot helpers */ \ + .compute = &Py_hmac_## HACL_HID ##_compute_func, \ + .compute_py = &_hmac_compute_## HACL_HID ##_impl, \ } #define Py_HMAC_HINFO_ENTRY(HACL_HID, HLIB_NAME) \ @@ -798,29 +743,16 @@ hmac_feed_initial_data(HMACObject *self, uint8_t *msg, Py_ssize_t len) { assert(self->name != NULL); assert(self->state != NULL); - if (len == 0) { - // do nothing if the buffer is empty - return 0; - } - - if (len < HASHLIB_GIL_MINSIZE) { - Py_HMAC_HACL_UPDATE(self->state, msg, len, self->name, return -1); - return 0; + assert(len >= 0); + int rc = 0; + if (len > 0) { + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + rc = _hacl_hmac_state_update(self->state, msg, len); + Py_END_ALLOW_THREADS } - - int res = 0; - Py_BEGIN_ALLOW_THREADS - Py_HMAC_HACL_UPDATE(self->state, msg, len, self->name, goto error); - goto done; -#ifndef NDEBUG -error: - res = -1; -#else - Py_UNREACHABLE(); -#endif -done: - Py_END_ALLOW_THREADS - return res; + return rc; } /*[clinic input] @@ -946,12 +878,13 @@ _hmac_HMAC_copy_impl(HMACObject *self, PyTypeObject *cls) return NULL; } - ENTER_HASHLIB(self); + int rc = 0; + HASHLIB_ACQUIRE_LOCK(self); /* copy hash information */ hmac_copy_hinfo(copy, self); /* copy internal state */ - int rc = hmac_copy_state(copy, self); - LEAVE_HASHLIB(self); + rc = hmac_copy_state(copy, self); + HASHLIB_RELEASE_LOCK(self); if (rc < 0) { Py_DECREF(copy); @@ -963,78 +896,6 @@ _hmac_HMAC_copy_impl(HMACObject *self, PyTypeObject *cls) return (PyObject *)copy; } -/* - * Update the HMAC object with the given buffer. - * - * This unconditionally acquires the lock on the HMAC object. - * - * On DEBUG builds, each update() call is verified. - * - * Return 0 on success; otherwise, set an exception and return -1 on failure. - */ -static int -hmac_update_state_with_lock(HMACObject *self, uint8_t *buf, Py_ssize_t len) -{ - int res = 0; - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); // unconditionally acquire a lock - Py_HMAC_HACL_UPDATE(self->state, buf, len, self->name, goto error); - goto done; -#ifndef NDEBUG -error: - res = -1; -#else - Py_UNREACHABLE(); -#endif -done: - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - return res; -} - -/* - * Update the HMAC object with the given buffer. - * - * This conditionally acquires the lock on the HMAC object. - * - * On DEBUG builds, each update() call is verified. - * - * Return 0 on success; otherwise, set an exception and return -1 on failure. - */ -static int -hmac_update_state_cond_lock(HMACObject *self, uint8_t *buf, Py_ssize_t len) -{ - ENTER_HASHLIB(self); // conditionally acquire a lock - Py_HMAC_HACL_UPDATE(self->state, buf, len, self->name, goto error); - LEAVE_HASHLIB(self); - return 0; - -#ifndef NDEBUG -error: - LEAVE_HASHLIB(self); - return -1; -#else - Py_UNREACHABLE(); -#endif -} - -/* - * Update the internal HMAC state with the given buffer. - * - * Return 0 on success; otherwise, set an exception and return -1 on failure. - */ -static inline int -hmac_update_state(HMACObject *self, uint8_t *buf, Py_ssize_t len) -{ - assert(buf != 0); - assert(len >= 0); - return len == 0 - ? 0 /* nothing to do */ - : len < HASHLIB_GIL_MINSIZE - ? hmac_update_state_cond_lock(self, buf, len) - : hmac_update_state_with_lock(self, buf, len); -} - /*[clinic input] _hmac.HMAC.update @@ -1047,9 +908,16 @@ static PyObject * _hmac_HMAC_update_impl(HMACObject *self, PyObject *msgobj) /*[clinic end generated code: output=962134ada5e55985 input=7c0ea830efb03367]*/ { + int rc = 0; Py_buffer msg; GET_BUFFER_VIEW_OR_ERROUT(msgobj, &msg); - int rc = hmac_update_state(self, msg.buf, msg.len); + if (msg.len > 0) { + Py_BEGIN_ALLOW_THREADS + HASHLIB_ACQUIRE_LOCK(self); + rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len); + HASHLIB_RELEASE_LOCK(self); + Py_END_ALLOW_THREADS + } PyBuffer_Release(&msg); return rc < 0 ? NULL : Py_None; } @@ -1069,14 +937,14 @@ hmac_digest_compute_cond_lock(HMACObject *self, uint8_t *digest) { assert(digest != NULL); hacl_errno_t rc; - ENTER_HASHLIB(self); // conditionally acquire a lock + HASHLIB_ACQUIRE_LOCK(self); rc = Hacl_Streaming_HMAC_digest(self->state, digest, self->digest_size); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); assert( rc == Hacl_Streaming_Types_Success || rc == Hacl_Streaming_Types_OutOfMemory ); - return _hacl_convert_errno(rc, NULL); + return _hacl_convert_errno(rc); } /*[clinic input] @@ -1692,20 +1560,6 @@ hmacmodule_init_strings(hmacmodule_state *state) return 0; } -static int -hmacmodule_init_globals(PyObject *module, hmacmodule_state *state) -{ -#define ADD_INT_CONST(NAME, VALUE) \ - do { \ - if (PyModule_AddIntConstant(module, (NAME), (VALUE)) < 0) { \ - return -1; \ - } \ - } while (0) - ADD_INT_CONST("_GIL_MINSIZE", HASHLIB_GIL_MINSIZE); -#undef ADD_INT_CONST - return 0; -} - static void hmacmodule_init_cpu_features(hmacmodule_state *state) { @@ -1796,9 +1650,6 @@ hmacmodule_exec(PyObject *module) if (hmacmodule_init_strings(state) < 0) { return -1; } - if (hmacmodule_init_globals(module, state) < 0) { - return -1; - } hmacmodule_init_cpu_features(state); return 0; } diff --git a/Modules/md5module.c b/Modules/md5module.c index a05fd9d591fdac..9aba1851dc1f4b 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -115,9 +115,9 @@ MD5Type_copy_impl(MD5object *self, PyTypeObject *cls) return NULL; } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); newobj->hash_state = Hacl_Hash_MD5_copy(self->hash_state); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (newobj->hash_state == NULL) { Py_DECREF(self); return PyErr_NoMemory(); @@ -126,11 +126,11 @@ MD5Type_copy_impl(MD5object *self, PyTypeObject *cls) } static void -md5_digest_compute_cond_lock(MD5object *self, uint8_t *digest) +md5_digest_compute_with_lock(MD5object *self, uint8_t *digest) { - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_MD5_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); } /*[clinic input] @@ -144,7 +144,7 @@ MD5Type_digest_impl(MD5object *self) /*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/ { uint8_t digest[MD5_DIGESTSIZE]; - md5_digest_compute_cond_lock(self, digest); + md5_digest_compute_with_lock(self, digest); return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE); } @@ -159,18 +159,20 @@ MD5Type_hexdigest_impl(MD5object *self) /*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/ { uint8_t digest[MD5_DIGESTSIZE]; - md5_digest_compute_cond_lock(self, digest); + md5_digest_compute_with_lock(self, digest); return _Py_strhex((const char *)digest, MD5_DIGESTSIZE); } static void -_hacl_md5_update(Hacl_Hash_MD5_state_t *state, uint8_t *buf, Py_ssize_t len) +_hacl_md5_state_update(Hacl_Hash_MD5_state_t *state, + uint8_t *buf, Py_ssize_t len) { + assert(len >= 0); /* - * Note: we explicitly ignore the error code on the basis that it would - * take more than 1 billion years to overflow the maximum admissible length - * for MD5 (2^61 - 1). - */ + * Note: we explicitly ignore the error code on the basis that it would + * take more than 1 billion years to overflow the maximum admissible length + * for MD5 (2^61 - 1). + */ #if PY_SSIZE_T_MAX > UINT32_MAX while (len > UINT32_MAX) { (void)Hacl_Hash_MD5_update(state, buf, UINT32_MAX); @@ -182,42 +184,6 @@ _hacl_md5_update(Hacl_Hash_MD5_state_t *state, uint8_t *buf, Py_ssize_t len) (void)Hacl_Hash_MD5_update(state, buf, (uint32_t)len); } -static void -md5_update_state_with_lock(MD5object *self, uint8_t *buf, Py_ssize_t len) -{ - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); // unconditionally acquire a lock - _hacl_md5_update(self->hash_state, buf, len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS -} - -static void -md5_update_state_cond_lock(MD5object *self, uint8_t *buf, Py_ssize_t len) -{ - ENTER_HASHLIB(self); // conditionally acquire a lock - _hacl_md5_update(self->hash_state, buf, len); - LEAVE_HASHLIB(self); -} - -static inline void -md5_update_state(MD5object *self, uint8_t *buf, Py_ssize_t len) -{ - assert(buf != 0); - assert(len >= 0); - if (len == 0) { - return; - } - if (len < HASHLIB_GIL_MINSIZE) { - md5_update_state_cond_lock(self, buf, len); - } - else { - HASHLIB_SET_MUTEX_POLICY(self, 1); - md5_update_state_with_lock(self, buf, len); - HASHLIB_SET_MUTEX_POLICY(self, 0); - } -} - /*[clinic input] MD5Type.update @@ -232,9 +198,14 @@ MD5Type_update_impl(MD5object *self, PyObject *obj) /*[clinic end generated code: output=b0fed9a7ce7ad253 input=6e1efcd9ecf17032]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - md5_update_state(self, buf.buf, buf.len); + if (buf.len > 0) { + Py_BEGIN_ALLOW_THREADS + HASHLIB_ACQUIRE_LOCK(self); + _hacl_md5_state_update(self->hash_state, buf.buf, buf.len); + HASHLIB_RELEASE_LOCK(self); + Py_END_ALLOW_THREADS + } PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -336,16 +307,11 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* Do not use self->mutex here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - _hacl_md5_update(new->hash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - _hacl_md5_update(new->hash_state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + _hacl_md5_state_update(new->hash_state, buf.buf, buf.len); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); } @@ -394,9 +360,6 @@ md5_exec(PyObject *m) if (PyModule_AddObjectRef(m, "MD5Type", (PyObject *)st->md5_type) < 0) { return -1; } - if (PyModule_AddIntConstant(m, "_GIL_MINSIZE", HASHLIB_GIL_MINSIZE) < 0) { - return -1; - } return 0; } diff --git a/Modules/sha1module.c b/Modules/sha1module.c index a746bf74f8d4c1..a95a9af5fe848c 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -121,9 +121,9 @@ SHA1Type_copy_impl(SHA1object *self, PyTypeObject *cls) return NULL; } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); newobj->hash_state = Hacl_Hash_SHA1_copy(self->hash_state); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (newobj->hash_state == NULL) { Py_DECREF(newobj); return PyErr_NoMemory(); @@ -142,9 +142,9 @@ SHA1Type_digest_impl(SHA1object *self) /*[clinic end generated code: output=2f05302a7aa2b5cb input=13824b35407444bd]*/ { unsigned char digest[SHA1_DIGESTSIZE]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_SHA1_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, SHA1_DIGESTSIZE); } @@ -159,14 +159,15 @@ SHA1Type_hexdigest_impl(SHA1object *self) /*[clinic end generated code: output=4161fd71e68c6659 input=97691055c0c74ab0]*/ { unsigned char digest[SHA1_DIGESTSIZE]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_SHA1_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, SHA1_DIGESTSIZE); } static void -update(Hacl_Hash_SHA1_state_t *state, uint8_t *buf, Py_ssize_t len) +_hacl_sha1_state_update(Hacl_Hash_SHA1_state_t *state, + uint8_t *buf, Py_ssize_t len) { /* * Note: we explicitly ignore the error code on the basis that it would @@ -198,22 +199,14 @@ SHA1Type_update_impl(SHA1object *self, PyObject *obj) /*[clinic end generated code: output=cdc8e0e106dbec5f input=aad8e07812edbba3]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { + if (buf.len > 0) { Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update(self->hash_state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); + HASHLIB_ACQUIRE_LOCK(self); + _hacl_sha1_state_update(self->hash_state, buf.buf, buf.len); + HASHLIB_RELEASE_LOCK(self); Py_END_ALLOW_THREADS - } else { - update(self->hash_state, buf.buf, buf.len); } - PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -314,16 +307,11 @@ _sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update(new->hash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update(new->hash_state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + _hacl_sha1_state_update(new->hash_state, buf.buf, buf.len); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); } @@ -373,12 +361,6 @@ _sha1_exec(PyObject *module) { return -1; } - if (PyModule_AddIntConstant(module, - "_GIL_MINSIZE", - HASHLIB_GIL_MINSIZE) < 0) - { - return -1; - } return 0; } diff --git a/Modules/sha2module.c b/Modules/sha2module.c index 72931910c5d720..1595fb9eca56e4 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -206,7 +206,8 @@ SHA512_dealloc(PyObject *op) * 64 bits so we loop in <4gig chunks when needed. */ static void -update_256(Hacl_Hash_SHA2_state_t_256 *state, uint8_t *buf, Py_ssize_t len) +_hacl_sha2_state_update_256(Hacl_Hash_SHA2_state_t_256 *state, + uint8_t *buf, Py_ssize_t len) { /* * Note: we explicitly ignore the error code on the basis that it would @@ -225,7 +226,8 @@ update_256(Hacl_Hash_SHA2_state_t_256 *state, uint8_t *buf, Py_ssize_t len) } static void -update_512(Hacl_Hash_SHA2_state_t_512 *state, uint8_t *buf, Py_ssize_t len) +_hacl_sha2_state_update_512(Hacl_Hash_SHA2_state_t_512 *state, + uint8_t *buf, Py_ssize_t len) { /* * Note: we explicitly ignore the error code on the basis that it would @@ -272,9 +274,9 @@ SHA256Type_copy_impl(SHA256object *self, PyTypeObject *cls) } } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); rc = SHA256copy(self, newobj); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (rc < 0) { Py_DECREF(newobj); return NULL; @@ -309,9 +311,9 @@ SHA512Type_copy_impl(SHA512object *self, PyTypeObject *cls) } } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); rc = SHA512copy(self, newobj); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (rc < 0) { Py_DECREF(newobj); return NULL; @@ -331,11 +333,11 @@ SHA256Type_digest_impl(SHA256object *self) { uint8_t digest[SHA256_DIGESTSIZE]; assert(self->digestsize <= SHA256_DIGESTSIZE); - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); // HACL* performs copies under the hood so that self->state remains valid // after this call. Hacl_Hash_SHA2_digest_256(self->state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, self->digestsize); } @@ -351,11 +353,11 @@ SHA512Type_digest_impl(SHA512object *self) { uint8_t digest[SHA512_DIGESTSIZE]; assert(self->digestsize <= SHA512_DIGESTSIZE); - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); // HACL* performs copies under the hood so that self->state remains valid // after this call. Hacl_Hash_SHA2_digest_512(self->state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, self->digestsize); } @@ -371,9 +373,9 @@ SHA256Type_hexdigest_impl(SHA256object *self) { uint8_t digest[SHA256_DIGESTSIZE]; assert(self->digestsize <= SHA256_DIGESTSIZE); - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_SHA2_digest_256(self->state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, self->digestsize); } @@ -389,9 +391,9 @@ SHA512Type_hexdigest_impl(SHA512object *self) { uint8_t digest[SHA512_DIGESTSIZE]; assert(self->digestsize <= SHA512_DIGESTSIZE); - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_SHA2_digest_512(self->state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, self->digestsize); } @@ -409,22 +411,14 @@ SHA256Type_update_impl(SHA256object *self, PyObject *obj) /*[clinic end generated code: output=dc58a580cf8905a5 input=b2d449d5b30f0f5a]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { + if (buf.len > 0) { Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update_256(self->state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); + HASHLIB_ACQUIRE_LOCK(self); + _hacl_sha2_state_update_256(self->state, buf.buf, buf.len); + HASHLIB_RELEASE_LOCK(self); Py_END_ALLOW_THREADS - } else { - update_256(self->state, buf.buf, buf.len); } - PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -443,22 +437,14 @@ SHA512Type_update_impl(SHA512object *self, PyObject *obj) /*[clinic end generated code: output=9af211766c0b7365 input=ded2b46656566283]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { + if (buf.len > 0) { Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update_512(self->state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); + HASHLIB_ACQUIRE_LOCK(self); + _hacl_sha2_state_update_512(self->state, buf.buf, buf.len); + HASHLIB_RELEASE_LOCK(self); Py_END_ALLOW_THREADS - } else { - update_512(self->state, buf.buf, buf.len); } - PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -638,16 +624,11 @@ _sha2_sha256_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update_256(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update_256(new->state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + _hacl_sha2_state_update_256(new->state, buf.buf, buf.len); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); } @@ -700,16 +681,11 @@ _sha2_sha224_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update_256(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update_256(new->state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + _hacl_sha2_state_update_256(new->state, buf.buf, buf.len); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); } @@ -763,16 +739,11 @@ _sha2_sha512_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update_512(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update_512(new->state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + _hacl_sha2_state_update_512(new->state, buf.buf, buf.len); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); } @@ -826,16 +797,13 @@ _sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + if (buf.len > 0) { Py_BEGIN_ALLOW_THREADS - update_512(new->state, buf.buf, buf.len); + _hacl_sha2_state_update_512(new->state, buf.buf, buf.len); Py_END_ALLOW_THREADS } - else { - update_512(new->state, buf.buf, buf.len); - } PyBuffer_Release(&buf); } @@ -919,13 +887,6 @@ static int sha2_exec(PyObject *module) return -1; } - if (PyModule_AddIntConstant(module, - "_GIL_MINSIZE", - HASHLIB_GIL_MINSIZE) < 0) - { - return -1; - } - return 0; } diff --git a/Modules/sha3module.c b/Modules/sha3module.c index cfbf0cbcc042c5..b66af6dbcd1c5e 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -163,16 +163,13 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, if (data) { GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + if (buf.len > 0) { Py_BEGIN_ALLOW_THREADS - sha3_update(self->hash_state, buf.buf, buf.len); + sha3_update(self->hash_state, buf.buf, buf.len); Py_END_ALLOW_THREADS } - else { - sha3_update(self->hash_state, buf.buf, buf.len); - } } PyBuffer_Release(&buf); @@ -238,9 +235,9 @@ _sha3_sha3_224_copy_impl(SHA3object *self) if ((newobj = newSHA3object(Py_TYPE(self))) == NULL) { return NULL; } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); newobj->hash_state = Hacl_Hash_SHA3_copy(self->hash_state); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (newobj->hash_state == NULL) { Py_DECREF(newobj); return PyErr_NoMemory(); @@ -262,9 +259,9 @@ _sha3_sha3_224_digest_impl(SHA3object *self) unsigned char digest[SHA3_MAX_DIGESTSIZE]; // This function errors out if the algorithm is SHAKE. Here, we know this // not to be the case, and therefore do not perform error checking. - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); (void)Hacl_Hash_SHA3_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, Hacl_Hash_SHA3_hash_len(self->hash_state)); } @@ -281,9 +278,9 @@ _sha3_sha3_224_hexdigest_impl(SHA3object *self) /*[clinic end generated code: output=75ad03257906918d input=2d91bb6e0d114ee3]*/ { unsigned char digest[SHA3_MAX_DIGESTSIZE]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); (void)Hacl_Hash_SHA3_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, Hacl_Hash_SHA3_hash_len(self->hash_state)); } @@ -303,22 +300,14 @@ _sha3_sha3_224_update_impl(SHA3object *self, PyObject *data) /*[clinic end generated code: output=390b7abf7c9795a5 input=a887f54dcc4ae227]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { + if (buf.len > 0) { Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - sha3_update(self->hash_state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); + HASHLIB_ACQUIRE_LOCK(self); + sha3_update(self->hash_state, buf.buf, buf.len); + HASHLIB_RELEASE_LOCK(self); Py_END_ALLOW_THREADS - } else { - sha3_update(self->hash_state, buf.buf, buf.len); } - PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -647,9 +636,6 @@ _sha3_exec(PyObject *m) if (PyModule_AddStringConstant(m, "implementation", "HACL") < 0) { return -1; } - if (PyModule_AddIntConstant(m, "_GIL_MINSIZE", HASHLIB_GIL_MINSIZE) < 0) { - return -1; - } return 0; } From 05c1e6631268b3e04d853ef74213b1dbbe2420e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 12:26:51 +0200 Subject: [PATCH 16/41] post-merge --- Modules/blake2module.c | 4 +--- Modules/hashlib.h | 3 ++- Modules/hmacmodule.c | 9 ++------- Modules/md5module.c | 4 +--- Modules/sha1module.c | 6 +----- Modules/sha2module.c | 10 ++-------- Modules/sha3module.c | 5 +---- 7 files changed, 10 insertions(+), 31 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 23487fcbd07be4..0d0fd3881d6687 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -350,7 +350,7 @@ type_to_impl(PyTypeObject *type) } typedef struct { - PyObject_HEAD + PyObject_HASHLIB_HEAD union { Hacl_Hash_Blake2s_state_t *blake2s_state; Hacl_Hash_Blake2b_state_t *blake2b_state; @@ -362,8 +362,6 @@ typedef struct { #endif }; blake2_impl impl; - bool use_mutex; - PyMutex mutex; } Blake2Object; #define _Blake2Object_CAST(op) ((Blake2Object *)(op)) diff --git a/Modules/hashlib.h b/Modules/hashlib.h index 43d53442666fb7..d33f144d79f0a9 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -50,7 +50,8 @@ #include "pythread.h" -#define HASHLIB_LOCK_HEAD \ +#define PyObject_HASHLIB_HEAD \ + PyObject_HEAD \ /* Guard against race conditions during incremental update(). */ \ PyMutex mutex; diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index cb53c1dcd7b42c..d3b22cb41ca119 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -283,11 +283,7 @@ get_hmacmodule_state_by_cls(PyTypeObject *cls) typedef Hacl_Streaming_HMAC_agile_state HACL_HMAC_state; typedef struct HMACObject { - PyObject_HEAD - - bool use_mutex; - PyMutex mutex; - + PyObject_HASHLIB_HEAD // Hash function information PyObject *name; // rendered name (exact unicode object) HMAC_Hash_Kind kind; // can be used for runtime dispatch (must be known) @@ -878,12 +874,11 @@ _hmac_HMAC_copy_impl(HMACObject *self, PyTypeObject *cls) return NULL; } - int rc = 0; HASHLIB_ACQUIRE_LOCK(self); /* copy hash information */ hmac_copy_hinfo(copy, self); /* copy internal state */ - rc = hmac_copy_state(copy, self); + int rc = hmac_copy_state(copy, self); HASHLIB_RELEASE_LOCK(self); if (rc < 0) { diff --git a/Modules/md5module.c b/Modules/md5module.c index bfc269c80b9a5e..38b6bbe5ad687c 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -37,10 +37,8 @@ class MD5Type "MD5object *" "&PyType_Type" #include "_hacl/Hacl_Hash_MD5.h" - typedef struct { - PyObject_HEAD - HASHLIB_LOCK_HEAD + PyObject_HASHLIB_HEAD Hacl_Hash_MD5_state_t *hash_state; } MD5object; diff --git a/Modules/sha1module.c b/Modules/sha1module.c index a95a9af5fe848c..c68caeb6a3ca93 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -38,11 +38,7 @@ class SHA1Type "SHA1object *" "&PyType_Type" #include "_hacl/Hacl_Hash_SHA1.h" typedef struct { - PyObject_HEAD - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; - PyThread_type_lock lock; + PyObject_HASHLIB_HEAD Hacl_Hash_SHA1_state_t *hash_state; } SHA1object; diff --git a/Modules/sha2module.c b/Modules/sha2module.c index 1595fb9eca56e4..cf2ca265c921b5 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -50,20 +50,14 @@ class SHA512Type "SHA512object *" "&PyType_Type" // TODO: Get rid of int digestsize in favor of Hacl state info? typedef struct { - PyObject_HEAD + PyObject_HASHLIB_HEAD int digestsize; - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; Hacl_Hash_SHA2_state_t_256 *state; } SHA256object; typedef struct { - PyObject_HEAD + PyObject_HASHLIB_HEAD int digestsize; - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; Hacl_Hash_SHA2_state_t_512 *state; } SHA512object; diff --git a/Modules/sha3module.c b/Modules/sha3module.c index b66af6dbcd1c5e..e12aaa9966a02f 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -59,10 +59,7 @@ class _sha3.shake_256 "SHA3object *" "&SHAKE256type" #include "_hacl/Hacl_Hash_SHA3.h" typedef struct { - PyObject_HEAD - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; + PyObject_HASHLIB_HEAD Hacl_Hash_SHA3_state_t *hash_state; } SHA3object; From db5727853bf6dea49a13f1e1b0135b940f519a6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 12:33:23 +0200 Subject: [PATCH 17/41] do not guard against empty buffers for now --- Modules/_hashopenssl.c | 24 +++++++++++------------- Modules/blake2module.c | 20 ++++++++------------ Modules/hmacmodule.c | 25 ++++++++++--------------- Modules/md5module.c | 28 +++++++++++----------------- Modules/sha1module.c | 12 +++++------- Modules/sha2module.c | 32 +++++++++++++------------------- Modules/sha3module.c | 20 ++++++++------------ 7 files changed, 66 insertions(+), 95 deletions(-) diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 4abcfe2219433e..1a8691035295bb 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -800,16 +800,16 @@ static PyObject * _hashlib_HASH_update_impl(HASHobject *self, PyObject *obj) /*[clinic end generated code: output=62ad989754946b86 input=aa1ce20e3f92ceb6]*/ { - int rc; + int result; Py_buffer view; GET_BUFFER_VIEW_OR_ERROUT(obj, &view); Py_BEGIN_ALLOW_THREADS HASHLIB_ACQUIRE_LOCK(self); - rc = _hashlib_HASH_hash(self, view.buf, view.len); + result = _hashlib_HASH_hash(self, view.buf, view.len); HASHLIB_RELEASE_LOCK(self); Py_END_ALLOW_THREADS PyBuffer_Release(&view); - return rc < 0 ? NULL : Py_None; + return result < 0 ? NULL : Py_None; } static PyMethodDef HASH_methods[] = { @@ -1806,19 +1806,17 @@ _hashlib_hmac_digest_size(HMACobject *self) static int _hmac_update(HMACobject *self, PyObject *obj) { - int r = 1; + int r; Py_buffer view = {0}; GET_BUFFER_VIEW_OR_ERROR(obj, &view, return 0); - if (view.len > 0) { - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - r = HMAC_Update(self->ctx, - (const unsigned char *)view.buf, - (size_t)view.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS - } + Py_BEGIN_ALLOW_THREADS + HASHLIB_ACQUIRE_LOCK(self); + r = HMAC_Update(self->ctx, + (const unsigned char *)view.buf, + (size_t)view.len); + HASHLIB_RELEASE_LOCK(self); + Py_END_ALLOW_THREADS PyBuffer_Release(&view); if (r == 0) { diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 0d0fd3881d6687..f7b44625666b99 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -642,11 +642,9 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size, if (data != NULL) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); - if (buf.len > 0) { - Py_BEGIN_ALLOW_THREADS - blake2_update_state_unlocked(self, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } + Py_BEGIN_ALLOW_THREADS + blake2_update_state_unlocked(self, buf.buf, buf.len); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); } @@ -819,13 +817,11 @@ _blake2_blake2b_update_impl(Blake2Object *self, PyObject *data) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - if (buf.len > 0) { - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - blake2_update_state_unlocked(self, buf.buf, buf.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS - } + Py_BEGIN_ALLOW_THREADS + HASHLIB_ACQUIRE_LOCK(self); + blake2_update_state_unlocked(self, buf.buf, buf.len); + HASHLIB_RELEASE_LOCK(self); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); Py_RETURN_NONE; } diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index d3b22cb41ca119..f4b72fb832f052 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -739,15 +739,12 @@ hmac_feed_initial_data(HMACObject *self, uint8_t *msg, Py_ssize_t len) { assert(self->name != NULL); assert(self->state != NULL); - assert(len >= 0); int rc = 0; - if (len > 0) { - /* Do not use self->mutex here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - rc = _hacl_hmac_state_update(self->state, msg, len); - Py_END_ALLOW_THREADS - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + rc = _hacl_hmac_state_update(self->state, msg, len); + Py_END_ALLOW_THREADS return rc; } @@ -906,13 +903,11 @@ _hmac_HMAC_update_impl(HMACObject *self, PyObject *msgobj) int rc = 0; Py_buffer msg; GET_BUFFER_VIEW_OR_ERROUT(msgobj, &msg); - if (msg.len > 0) { - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS - } + Py_BEGIN_ALLOW_THREADS + HASHLIB_ACQUIRE_LOCK(self); + rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len); + HASHLIB_RELEASE_LOCK(self); + Py_END_ALLOW_THREADS PyBuffer_Release(&msg); return rc < 0 ? NULL : Py_None; } diff --git a/Modules/md5module.c b/Modules/md5module.c index 38b6bbe5ad687c..7f94f6bf6ad9f1 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -123,14 +123,6 @@ MD5Type_copy_impl(MD5object *self, PyTypeObject *cls) return (PyObject *)newobj; } -static void -md5_digest_compute_with_lock(MD5object *self, uint8_t *digest) -{ - HASHLIB_ACQUIRE_LOCK(self); - Hacl_Hash_MD5_digest(self->hash_state, digest); - HASHLIB_RELEASE_LOCK(self); -} - /*[clinic input] MD5Type.digest @@ -142,7 +134,9 @@ MD5Type_digest_impl(MD5object *self) /*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/ { uint8_t digest[MD5_DIGESTSIZE]; - md5_digest_compute_with_lock(self, digest); + HASHLIB_ACQUIRE_LOCK(self); + Hacl_Hash_MD5_digest(self->hash_state, digest); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE); } @@ -157,7 +151,9 @@ MD5Type_hexdigest_impl(MD5object *self) /*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/ { uint8_t digest[MD5_DIGESTSIZE]; - md5_digest_compute_with_lock(self, digest); + HASHLIB_ACQUIRE_LOCK(self); + Hacl_Hash_MD5_digest(self->hash_state, digest); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, MD5_DIGESTSIZE); } @@ -197,13 +193,11 @@ MD5Type_update_impl(MD5object *self, PyObject *obj) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - if (buf.len > 0) { - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - _hacl_md5_state_update(self->hash_state, buf.buf, buf.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS - } + Py_BEGIN_ALLOW_THREADS + HASHLIB_ACQUIRE_LOCK(self); + _hacl_md5_state_update(self->hash_state, buf.buf, buf.len); + HASHLIB_RELEASE_LOCK(self); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); Py_RETURN_NONE; } diff --git a/Modules/sha1module.c b/Modules/sha1module.c index c68caeb6a3ca93..806dbab59e9337 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -196,13 +196,11 @@ SHA1Type_update_impl(SHA1object *self, PyObject *obj) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - if (buf.len > 0) { - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - _hacl_sha1_state_update(self->hash_state, buf.buf, buf.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS - } + Py_BEGIN_ALLOW_THREADS + HASHLIB_ACQUIRE_LOCK(self); + _hacl_sha1_state_update(self->hash_state, buf.buf, buf.len); + HASHLIB_RELEASE_LOCK(self); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); Py_RETURN_NONE; } diff --git a/Modules/sha2module.c b/Modules/sha2module.c index cf2ca265c921b5..d8530284d258e7 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -406,13 +406,11 @@ SHA256Type_update_impl(SHA256object *self, PyObject *obj) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - if (buf.len > 0) { - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - _hacl_sha2_state_update_256(self->state, buf.buf, buf.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS - } + Py_BEGIN_ALLOW_THREADS + HASHLIB_ACQUIRE_LOCK(self); + _hacl_sha2_state_update_256(self->state, buf.buf, buf.len); + HASHLIB_RELEASE_LOCK(self); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -432,13 +430,11 @@ SHA512Type_update_impl(SHA512object *self, PyObject *obj) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - if (buf.len > 0) { - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - _hacl_sha2_state_update_512(self->state, buf.buf, buf.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS - } + Py_BEGIN_ALLOW_THREADS + HASHLIB_ACQUIRE_LOCK(self); + _hacl_sha2_state_update_512(self->state, buf.buf, buf.len); + HASHLIB_RELEASE_LOCK(self); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -793,11 +789,9 @@ _sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity, if (string) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - if (buf.len > 0) { - Py_BEGIN_ALLOW_THREADS - _hacl_sha2_state_update_512(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } + Py_BEGIN_ALLOW_THREADS + _hacl_sha2_state_update_512(new->state, buf.buf, buf.len); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); } diff --git a/Modules/sha3module.c b/Modules/sha3module.c index e12aaa9966a02f..c83caa920c4bd8 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -162,11 +162,9 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - if (buf.len > 0) { - Py_BEGIN_ALLOW_THREADS - sha3_update(self->hash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } + Py_BEGIN_ALLOW_THREADS + sha3_update(self->hash_state, buf.buf, buf.len); + Py_END_ALLOW_THREADS } PyBuffer_Release(&buf); @@ -298,13 +296,11 @@ _sha3_sha3_224_update_impl(SHA3object *self, PyObject *data) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - if (buf.len > 0) { - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - sha3_update(self->hash_state, buf.buf, buf.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS - } + Py_BEGIN_ALLOW_THREADS + HASHLIB_ACQUIRE_LOCK(self); + sha3_update(self->hash_state, buf.buf, buf.len); + HASHLIB_RELEASE_LOCK(self); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); Py_RETURN_NONE; } From ead20a1b6fa840fc0f2eef216dc3cd17e06adeb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 12:38:09 +0200 Subject: [PATCH 18/41] consistency fixes --- Modules/blake2module.c | 8 +++++--- Modules/sha3module.c | 7 ++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index f7b44625666b99..3ad68a8de2f080 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -418,7 +418,7 @@ new_Blake2Object(PyTypeObject *type) } while (0) static void -blake2_update_state_unlocked(Blake2Object *self, uint8_t *buf, Py_ssize_t len) +blake2_update_unlocked(Blake2Object *self, uint8_t *buf, Py_ssize_t len) { switch (self->impl) { // blake2b_256_state and blake2s_128_state must be if'd since @@ -642,8 +642,10 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size, if (data != NULL) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ Py_BEGIN_ALLOW_THREADS - blake2_update_state_unlocked(self, buf.buf, buf.len); + blake2_update_unlocked(self, buf.buf, buf.len); Py_END_ALLOW_THREADS PyBuffer_Release(&buf); } @@ -819,7 +821,7 @@ _blake2_blake2b_update_impl(Blake2Object *self, PyObject *data) GET_BUFFER_VIEW_OR_ERROUT(data, &buf); Py_BEGIN_ALLOW_THREADS HASHLIB_ACQUIRE_LOCK(self); - blake2_update_state_unlocked(self, buf.buf, buf.len); + blake2_update_unlocked(self, buf.buf, buf.len); HASHLIB_RELEASE_LOCK(self); Py_END_ALLOW_THREADS PyBuffer_Release(&buf); diff --git a/Modules/sha3module.c b/Modules/sha3module.c index c83caa920c4bd8..09b280eca43985 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -81,7 +81,8 @@ newSHA3object(PyTypeObject *type) } static void -sha3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *buf, Py_ssize_t len) +_hacl_sha3_state_update(Hacl_Hash_SHA3_state_t *state, + uint8_t *buf, Py_ssize_t len) { /* * Note: we explicitly ignore the error code on the basis that it would @@ -163,7 +164,7 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ Py_BEGIN_ALLOW_THREADS - sha3_update(self->hash_state, buf.buf, buf.len); + _hacl_sha3_state_update(self->hash_state, buf.buf, buf.len); Py_END_ALLOW_THREADS } @@ -298,7 +299,7 @@ _sha3_sha3_224_update_impl(SHA3object *self, PyObject *data) GET_BUFFER_VIEW_OR_ERROUT(data, &buf); Py_BEGIN_ALLOW_THREADS HASHLIB_ACQUIRE_LOCK(self); - sha3_update(self->hash_state, buf.buf, buf.len); + _hacl_sha3_state_update(self->hash_state, buf.buf, buf.len); HASHLIB_RELEASE_LOCK(self); Py_END_ALLOW_THREADS PyBuffer_Release(&buf); From 68a6bbcc11c85b0beb78a239123ff9052201ec50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 12:41:26 +0200 Subject: [PATCH 19/41] remove unused import --- Lib/test/support/hashlib_helper.py | 1 - Lib/test/test_hashlib.py | 1 - 2 files changed, 2 deletions(-) diff --git a/Lib/test/support/hashlib_helper.py b/Lib/test/support/hashlib_helper.py index c0d5da042d8bcf..da318a307e8f42 100644 --- a/Lib/test/support/hashlib_helper.py +++ b/Lib/test/support/hashlib_helper.py @@ -1,6 +1,5 @@ import functools import hashlib -import importlib import unittest from test.support.import_helper import import_module diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index a9469504b504ed..44ebb884109efd 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -20,7 +20,6 @@ import unittest from test import support from test.support import _4G, bigmemtest -from test.support import hashlib_helper from test.support.import_helper import import_fresh_module from test.support import requires_resource from test.support import threading_helper From 68f297e0c3a82581788c33c5a1ab53ed068d49b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 13:11:07 +0200 Subject: [PATCH 20/41] correct naming for locked/unlocked versions --- Modules/blake2module.c | 4 ++-- Modules/hmacmodule.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 3ad68a8de2f080..07920d5d3da53d 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -737,7 +737,7 @@ py_blake2s_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, } static int -blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy) +blake2_blake2b_copy_unlocked(Blake2Object *self, Blake2Object *cpy) { assert(cpy != NULL); #define BLAKE2_COPY(TYPE, STATE_ATTR) \ @@ -795,7 +795,7 @@ _blake2_blake2b_copy_impl(Blake2Object *self) } HASHLIB_ACQUIRE_LOCK(self); - rc = blake2_blake2b_copy_locked(self, cpy); + rc = blake2_blake2b_copy_unlocked(self, cpy); HASHLIB_RELEASE_LOCK(self); if (rc < 0) { Py_DECREF(cpy); diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index f4b72fb832f052..59de3c1525778c 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -923,7 +923,7 @@ _hmac_HMAC_update_impl(HMACObject *self, PyObject *msgobj) * Note: this function may raise a MemoryError. */ static int -hmac_digest_compute_cond_lock(HMACObject *self, uint8_t *digest) +hmac_digest_compute_locked(HMACObject *self, uint8_t *digest) { assert(digest != NULL); hacl_errno_t rc; @@ -951,7 +951,7 @@ _hmac_HMAC_digest_impl(HMACObject *self) { assert(self->digest_size <= Py_hmac_hash_max_digest_size); uint8_t digest[Py_hmac_hash_max_digest_size]; - if (hmac_digest_compute_cond_lock(self, digest) < 0) { + if (hmac_digest_compute_locked(self, digest) < 0) { return NULL; } return PyBytes_FromStringAndSize((const char *)digest, self->digest_size); @@ -974,7 +974,7 @@ _hmac_HMAC_hexdigest_impl(HMACObject *self) { assert(self->digest_size <= Py_hmac_hash_max_digest_size); uint8_t digest[Py_hmac_hash_max_digest_size]; - if (hmac_digest_compute_cond_lock(self, digest) < 0) { + if (hmac_digest_compute_locked(self, digest) < 0) { return NULL; } return _Py_strhex((const char *)digest, self->digest_size); From 9817c3dc0123693d722b8c157ab9ea5b582296cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 13:13:25 +0200 Subject: [PATCH 21/41] debug? --- Modules/hmacmodule.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 59de3c1525778c..ae5063b53f5c8b 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -491,7 +491,10 @@ _hacl_hmac_state_update(HACL_HMAC_state *state, uint8_t *buf, Py_ssize_t len) len -= UINT32_MAX; } #endif - assert(len <= UINT32_MAX_AS_SSIZE_T); + if (len > UINT32_MAX_AS_SSIZE_T) { + PyErr_Format(PyExc_ValueError, "invalid length: %zd (max: %ju)", len, UINT32_MAX); + return -1; + } return _hacl_hmac_state_update_once(state, buf, len); } From c14c87d2e7fe20619ee724654f2f9336e6cbc511 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 13:30:52 +0200 Subject: [PATCH 22/41] simplify HMAC --- Modules/hmacmodule.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index ae5063b53f5c8b..99ed022abce940 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -728,29 +728,6 @@ hmac_new_initial_state(HMACObject *self, uint8_t *key, Py_ssize_t len) return self->state == NULL ? -1 : 0; } -/* - * Feed initial data. - * - * This function MUST only be called by the HMAC object constructor - * and after hmac_set_hinfo() and hmac_new_initial_state() have been - * called, lest the behaviour is undefined. - * - * Return 0 on success; otherwise, set an exception and return -1 on failure. - */ -static int -hmac_feed_initial_data(HMACObject *self, uint8_t *msg, Py_ssize_t len) -{ - assert(self->name != NULL); - assert(self->state != NULL); - int rc = 0; - /* Do not use self->mutex here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - rc = _hacl_hmac_state_update(self->state, msg, len); - Py_END_ALLOW_THREADS - return rc; -} - /*[clinic input] _hmac.new @@ -797,7 +774,11 @@ _hmac_new_impl(PyObject *module, PyObject *keyobj, PyObject *msgobj, if (msgobj != NULL && msgobj != Py_None) { Py_buffer msg; GET_BUFFER_VIEW_OR_ERROR(msgobj, &msg, goto error); - rc = hmac_feed_initial_data(self, msg.buf, msg.len); + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len); + Py_END_ALLOW_THREADS PyBuffer_Release(&msg); #ifndef NDEBUG if (rc < 0) { From bfb543622d0cfd9c820183815bd43059fbb642d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 13:48:53 +0200 Subject: [PATCH 23/41] release the GIL for large buffers --- Modules/blake2module.c | 16 +++++++-------- Modules/hashlib.h | 23 +++++++++++++++++++++ Modules/hmacmodule.c | 16 +++++++-------- Modules/md5module.c | 16 +++++++-------- Modules/sha1module.c | 16 +++++++-------- Modules/sha2module.c | 46 ++++++++++++++++++++++-------------------- Modules/sha3module.c | 16 +++++++-------- 7 files changed, 87 insertions(+), 62 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 07920d5d3da53d..0f071958d2db23 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -644,9 +644,10 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size, GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - blake2_update_unlocked(self, buf.buf, buf.len); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS( + buf.len, + blake2_update_unlocked(self, buf.buf, buf.len) + ) PyBuffer_Release(&buf); } @@ -819,11 +820,10 @@ _blake2_blake2b_update_impl(Blake2Object *self, PyObject *data) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - blake2_update_unlocked(self, buf.buf, buf.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX( + self, buf.len, + blake2_update_unlocked(self, buf.buf, buf.len) + ) PyBuffer_Release(&buf); Py_RETURN_NONE; } diff --git a/Modules/hashlib.h b/Modules/hashlib.h index d33f144d79f0a9..35f2ee1a607a79 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -63,6 +63,29 @@ (OBJ)->mutex = (PyMutex){0}; \ } while (0) +#define HASHLIB_GIL_MINSIZE 2048 +#define HASHLIB_EXTERNAL_INSTRUCTIONS(SIZE, STATEMENTS) \ + if ((SIZE) > HASHLIB_GIL_MINSIZE) { \ + Py_BEGIN_ALLOW_THREADS \ + STATEMENTS; \ + Py_END_ALLOW_THREADS \ + } \ + else { \ + STATEMENTS; \ + } + +#define HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX(OBJ, SIZE, STATEMENTS) \ + if ((SIZE) > HASHLIB_GIL_MINSIZE) { \ + Py_BEGIN_ALLOW_THREADS \ + HASHLIB_ACQUIRE_LOCK(OBJ); \ + STATEMENTS; \ + HASHLIB_RELEASE_LOCK(OBJ); \ + Py_END_ALLOW_THREADS \ + } \ + else { \ + STATEMENTS; \ + } + static inline int _Py_hashlib_data_argument(PyObject **res, PyObject *data, PyObject *string) { diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 99ed022abce940..a98b0ead82867c 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -776,9 +776,10 @@ _hmac_new_impl(PyObject *module, PyObject *keyobj, PyObject *msgobj, GET_BUFFER_VIEW_OR_ERROR(msgobj, &msg, goto error); /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS( + msg.len, + _hacl_hmac_state_update(self->state, msg.buf, msg.len) + ); PyBuffer_Release(&msg); #ifndef NDEBUG if (rc < 0) { @@ -887,11 +888,10 @@ _hmac_HMAC_update_impl(HMACObject *self, PyObject *msgobj) int rc = 0; Py_buffer msg; GET_BUFFER_VIEW_OR_ERROUT(msgobj, &msg); - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX( + self, msg.len, + rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len) + ) PyBuffer_Release(&msg); return rc < 0 ? NULL : Py_None; } diff --git a/Modules/md5module.c b/Modules/md5module.c index 7f94f6bf6ad9f1..a5c0dc350fb5ea 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -193,11 +193,10 @@ MD5Type_update_impl(MD5object *self, PyObject *obj) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - _hacl_md5_state_update(self->hash_state, buf.buf, buf.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX( + self, buf.len, + _hacl_md5_state_update(self->hash_state, buf.buf, buf.len) + ) PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -301,9 +300,10 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, if (string) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - _hacl_md5_state_update(new->hash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS( + buf.len, + _hacl_md5_state_update(new->hash_state, buf.buf, buf.len) + ) PyBuffer_Release(&buf); } diff --git a/Modules/sha1module.c b/Modules/sha1module.c index 806dbab59e9337..bdeae1e36f0f7a 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -196,11 +196,10 @@ SHA1Type_update_impl(SHA1object *self, PyObject *obj) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - _hacl_sha1_state_update(self->hash_state, buf.buf, buf.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX( + self, buf.len, + _hacl_sha1_state_update(self->hash_state, buf.buf, buf.len) + ) PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -303,9 +302,10 @@ _sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, if (string) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - _hacl_sha1_state_update(new->hash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS( + buf.len, + _hacl_sha1_state_update(new->hash_state, buf.buf, buf.len) + ) PyBuffer_Release(&buf); } diff --git a/Modules/sha2module.c b/Modules/sha2module.c index d8530284d258e7..12026888bc6537 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -406,11 +406,10 @@ SHA256Type_update_impl(SHA256object *self, PyObject *obj) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - _hacl_sha2_state_update_256(self->state, buf.buf, buf.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX( + self, buf.len, + _hacl_sha2_state_update_256(self->state, buf.buf, buf.len) + ) PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -430,11 +429,10 @@ SHA512Type_update_impl(SHA512object *self, PyObject *obj) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - _hacl_sha2_state_update_512(self->state, buf.buf, buf.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX( + self, buf.len, + _hacl_sha2_state_update_512(self->state, buf.buf, buf.len) + ) PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -616,9 +614,10 @@ _sha2_sha256_impl(PyObject *module, PyObject *data, int usedforsecurity, if (string) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - _hacl_sha2_state_update_256(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS( + buf.len, + _hacl_sha2_state_update_256(new->state, buf.buf, buf.len) + ) PyBuffer_Release(&buf); } @@ -673,9 +672,10 @@ _sha2_sha224_impl(PyObject *module, PyObject *data, int usedforsecurity, if (string) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - _hacl_sha2_state_update_256(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS( + buf.len, + _hacl_sha2_state_update_256(new->state, buf.buf, buf.len) + ) PyBuffer_Release(&buf); } @@ -731,9 +731,10 @@ _sha2_sha512_impl(PyObject *module, PyObject *data, int usedforsecurity, if (string) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - _hacl_sha2_state_update_512(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS( + buf.len, + _hacl_sha2_state_update_512(new->state, buf.buf, buf.len) + ) PyBuffer_Release(&buf); } @@ -789,9 +790,10 @@ _sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity, if (string) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - _hacl_sha2_state_update_512(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS( + buf.len, + _hacl_sha2_state_update_512(new->state, buf.buf, buf.len) + ) PyBuffer_Release(&buf); } diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 09b280eca43985..3d65ff472f573d 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -163,9 +163,10 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - _hacl_sha3_state_update(self->hash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS( + buf.len, + _hacl_sha3_state_update(self->hash_state, buf.buf, buf.len) + ) } PyBuffer_Release(&buf); @@ -297,11 +298,10 @@ _sha3_sha3_224_update_impl(SHA3object *self, PyObject *data) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - _hacl_sha3_state_update(self->hash_state, buf.buf, buf.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX( + self, buf.len, + _hacl_sha3_state_update(self->hash_state, buf.buf, buf.len) + ) PyBuffer_Release(&buf); Py_RETURN_NONE; } From 923c05f772eb752f8babc6464bb3c7d29dff1a10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 13:53:25 +0200 Subject: [PATCH 24/41] restore GIL_MINSIZE --- Lib/test/support/hashlib_helper.py | 20 ++++++++++++++++ Lib/test/test_hashlib.py | 37 +++++++++++++++++++++++++++++- Lib/test/test_hmac.py | 24 +++++++++++++++++++ Modules/_hashopenssl.c | 12 ++++++++++ Modules/blake2module.c | 2 ++ Modules/hmacmodule.c | 17 ++++++++++++++ Modules/md5module.c | 3 +++ Modules/sha1module.c | 6 +++++ Modules/sha2module.c | 7 ++++++ Modules/sha3module.c | 3 +++ 10 files changed, 130 insertions(+), 1 deletion(-) diff --git a/Lib/test/support/hashlib_helper.py b/Lib/test/support/hashlib_helper.py index da318a307e8f42..7032257b06877a 100644 --- a/Lib/test/support/hashlib_helper.py +++ b/Lib/test/support/hashlib_helper.py @@ -1,5 +1,6 @@ import functools import hashlib +import importlib import unittest from test.support.import_helper import import_module @@ -307,3 +308,22 @@ def sha3_384(self): @property def sha3_512(self): return self._find_constructor_in("_sha3","sha3_512") + + +def find_gil_minsize(modules_names, default=2048): + """Get the largest GIL_MINSIZE value for the given cryptographic modules. + + The valid module names are the following: + + - _hashlib + - _md5, _sha1, _sha2, _sha3, _blake2 + - _hmac + """ + sizes = [] + for module_name in modules_names: + try: + module = importlib.import_module(module_name) + except ImportError: + continue + sizes.append(getattr(module, '_GIL_MINSIZE', default)) + return max(sizes, default=default) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 44ebb884109efd..b83ae181718b7a 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -20,6 +20,7 @@ import unittest from test import support from test.support import _4G, bigmemtest +from test.support import hashlib_helper from test.support.import_helper import import_fresh_module from test.support import requires_resource from test.support import threading_helper @@ -408,7 +409,7 @@ def test_large_update(self): aas = b'a' * 128 bees = b'b' * 127 cees = b'c' * 126 - dees = b'd' * 2048 + dees = b'd' * 2048 # HASHLIB_GIL_MINSIZE for cons in self.hash_constructors: m1 = cons(usedforsecurity=False) @@ -989,6 +990,40 @@ def test_case_shake256_vector(self): for msg, md in read_vectors('shake_256'): self.check('shake_256', msg, md, True) + def test_gil(self): + # Check things work fine with an input larger than the size required + # for multithreaded operation. Currently, all cryptographic modules + # have the same constant value (2048) but in the future it might not + # be the case. + mods = ['_md5', '_sha1', '_sha2', '_sha3', '_blake2', '_hashlib'] + gil_minsize = hashlib_helper.find_gil_minsize(mods) + for cons in self.hash_constructors: + # constructors belong to one of the above modules + m = cons(usedforsecurity=False) + m.update(b'1') + m.update(b'#' * gil_minsize) + m.update(b'1') + + m = cons(b'x' * gil_minsize, usedforsecurity=False) + m.update(b'1') + + def test_sha256_gil(self): + gil_minsize = hashlib_helper.find_gil_minsize(['_sha2', '_hashlib']) + m = hashlib.sha256() + m.update(b'1') + m.update(b'#' * gil_minsize) + m.update(b'1') + self.assertEqual( + m.hexdigest(), + '1cfceca95989f51f658e3f3ffe7f1cd43726c9e088c13ee10b46f57cef135b94' + ) + + m = hashlib.sha256(b'1' + b'#' * gil_minsize + b'1') + self.assertEqual( + m.hexdigest(), + '1cfceca95989f51f658e3f3ffe7f1cd43726c9e088c13ee10b46f57cef135b94' + ) + @threading_helper.reap_threads @threading_helper.requires_working_threading() def test_threaded_hashing(self): diff --git a/Lib/test/test_hmac.py b/Lib/test/test_hmac.py index d1f4662adbb618..ff6e1bce0ef801 100644 --- a/Lib/test/test_hmac.py +++ b/Lib/test/test_hmac.py @@ -1133,6 +1133,11 @@ def HMAC(self, key, msg=None): """Create a HMAC object.""" raise NotImplementedError + @property + def gil_minsize(self): + """Get the maximal input length for the GIL to be held.""" + raise NotImplementedError + def check_update(self, key, chunks): chunks = list(chunks) msg = b''.join(chunks) @@ -1150,6 +1155,13 @@ def test_update(self): with self.subTest(key=key, msg=msg): self.check_update(key, [msg]) + def test_update_large(self): + gil_minsize = self.gil_minsize + key = random.randbytes(16) + top = random.randbytes(gil_minsize + 1) + bot = random.randbytes(gil_minsize + 1) + self.check_update(key, [top, bot]) + def test_update_exceptions(self): h = self.HMAC(b"key") for msg in ['invalid msg', 123, (), []]: @@ -1163,6 +1175,10 @@ class PyUpdateTestCase(PyModuleMixin, UpdateTestCaseMixin, unittest.TestCase): def HMAC(self, key, msg=None): return self.hmac.HMAC(key, msg, digestmod='sha256') + @property + def gil_minsize(self): + return sha2._GIL_MINSIZE + @hashlib_helper.requires_openssl_hashdigest('sha256') class OpenSSLUpdateTestCase(UpdateTestCaseMixin, unittest.TestCase): @@ -1170,6 +1186,10 @@ class OpenSSLUpdateTestCase(UpdateTestCaseMixin, unittest.TestCase): def HMAC(self, key, msg=None): return _hashlib.hmac_new(key, msg, digestmod='sha256') + @property + def gil_minsize(self): + return _hashlib._GIL_MINSIZE + class BuiltinUpdateTestCase(BuiltinModuleMixin, UpdateTestCaseMixin, unittest.TestCase): @@ -1179,6 +1199,10 @@ def HMAC(self, key, msg=None): # are still built, making it possible to use SHA-2 hashes. return self.hmac.new(key, msg, digestmod='sha256') + @property + def gil_minsize(self): + return self.hmac._GIL_MINSIZE + class CopyBaseTestCase: diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 1a8691035295bb..08248af71da16d 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -2458,6 +2458,17 @@ hashlib_exception(PyObject *module) return 0; } +static int +hashlib_constants(PyObject *module) +{ + if (PyModule_AddIntConstant(module, "_GIL_MINSIZE", + HASHLIB_GIL_MINSIZE) < 0) + { + return -1; + } + return 0; +} + static PyModuleDef_Slot hashlib_slots[] = { {Py_mod_exec, hashlib_init_hashtable}, {Py_mod_exec, hashlib_init_HASH_type}, @@ -2466,6 +2477,7 @@ static PyModuleDef_Slot hashlib_slots[] = { {Py_mod_exec, hashlib_md_meth_names}, {Py_mod_exec, hashlib_init_constructors}, {Py_mod_exec, hashlib_exception}, + {Py_mod_exec, hashlib_constants}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL} diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 0f071958d2db23..ecff6467f7b37f 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -227,6 +227,8 @@ blake2_exec(PyObject *m) } \ } while (0) + ADD_INT_CONST("_GIL_MINSIZE", HASHLIB_GIL_MINSIZE); + st->blake2b_type = (PyTypeObject *)PyType_FromModuleAndSpec( m, &blake2b_type_spec, NULL); diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index a98b0ead82867c..df53c4ef0d69a6 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -1534,6 +1534,20 @@ hmacmodule_init_strings(hmacmodule_state *state) return 0; } +static int +hmacmodule_init_globals(PyObject *module, hmacmodule_state *state) +{ +#define ADD_INT_CONST(NAME, VALUE) \ + do { \ + if (PyModule_AddIntConstant(module, (NAME), (VALUE)) < 0) { \ + return -1; \ + } \ + } while (0) + ADD_INT_CONST("_GIL_MINSIZE", HASHLIB_GIL_MINSIZE); +#undef ADD_INT_CONST + return 0; +} + static void hmacmodule_init_cpu_features(hmacmodule_state *state) { @@ -1624,6 +1638,9 @@ hmacmodule_exec(PyObject *module) if (hmacmodule_init_strings(state) < 0) { return -1; } + if (hmacmodule_init_globals(module, state) < 0) { + return -1; + } hmacmodule_init_cpu_features(state); return 0; } diff --git a/Modules/md5module.c b/Modules/md5module.c index a5c0dc350fb5ea..94a070ba4ba5c6 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -352,6 +352,9 @@ md5_exec(PyObject *m) if (PyModule_AddObjectRef(m, "MD5Type", (PyObject *)st->md5_type) < 0) { return -1; } + if (PyModule_AddIntConstant(m, "_GIL_MINSIZE", HASHLIB_GIL_MINSIZE) < 0) { + return -1; + } return 0; } diff --git a/Modules/sha1module.c b/Modules/sha1module.c index bdeae1e36f0f7a..fd940aa5c6988a 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -355,6 +355,12 @@ _sha1_exec(PyObject *module) { return -1; } + if (PyModule_AddIntConstant(module, + "_GIL_MINSIZE", + HASHLIB_GIL_MINSIZE) < 0) + { + return -1; + } return 0; } diff --git a/Modules/sha2module.c b/Modules/sha2module.c index 12026888bc6537..7eb397858495a6 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -877,6 +877,13 @@ static int sha2_exec(PyObject *module) return -1; } + if (PyModule_AddIntConstant(module, + "_GIL_MINSIZE", + HASHLIB_GIL_MINSIZE) < 0) + { + return -1; + } + return 0; } diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 3d65ff472f573d..26e6692e0ad5fd 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -630,6 +630,9 @@ _sha3_exec(PyObject *m) if (PyModule_AddStringConstant(m, "implementation", "HACL") < 0) { return -1; } + if (PyModule_AddIntConstant(m, "_GIL_MINSIZE", HASHLIB_GIL_MINSIZE) < 0) { + return -1; + } return 0; } From 55b2afabcd516207125c4774111a5c4288b5c890 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 14:17:21 +0200 Subject: [PATCH 25/41] correctly lock objects --- Lib/test/test_hashlib.py | 10 +++++++-- Modules/_hashopenssl.c | 39 ++++++++++++++-------------------- Modules/blake2module.c | 8 +++---- Modules/hashlib.h | 46 +++++++++++++++++++++++----------------- Modules/hmacmodule.c | 6 +++--- Modules/md5module.c | 8 +++---- Modules/sha1module.c | 8 +++---- Modules/sha2module.c | 24 ++++++++++----------- Modules/sha3module.c | 8 +++---- 9 files changed, 81 insertions(+), 76 deletions(-) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index b83ae181718b7a..ca9c561ea803f3 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -1027,17 +1027,23 @@ def test_sha256_gil(self): @threading_helper.reap_threads @threading_helper.requires_working_threading() def test_threaded_hashing(self): + for constructor in self.hash_constructors: + if constructor().name not in self.shakes: + with self.subTest(constructor=constructor): + self.do_test_threaded_hashing(constructor) + + def do_test_threaded_hashing(self, constructor): # Updating the same hash object from several threads at once # using data chunk sizes containing the same byte sequences. # # If the internal locks are working to prevent multiple # updates on the same object from running at once, the resulting # hash will be the same as doing it single threaded upfront. - hasher = hashlib.sha1() + hasher = constructor() num_threads = 5 smallest_data = b'swineflu' data = smallest_data * 200000 - expected_hash = hashlib.sha1(data*num_threads).hexdigest() + expected_hash = constructor(data*num_threads).hexdigest() def hash_in_chunks(chunk_size): index = 0 diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 08248af71da16d..3f454578411d26 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -278,21 +278,15 @@ get_hashlib_state(PyObject *module) } typedef struct { - PyObject_HEAD + PyObject_HASHLIB_HEAD EVP_MD_CTX *ctx; /* OpenSSL message digest context */ - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; /* OpenSSL context lock */ } HASHobject; #define HASHobject_CAST(op) ((HASHobject *)(op)) typedef struct { - PyObject_HEAD + PyObject_HASHLIB_HEAD HMAC_CTX *ctx; /* OpenSSL hmac context */ - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; /* HMAC context lock */ } HMACobject; #define HMACobject_CAST(op) ((HMACobject *)(op)) @@ -803,11 +797,10 @@ _hashlib_HASH_update_impl(HASHobject *self, PyObject *obj) int result; Py_buffer view; GET_BUFFER_VIEW_OR_ERROUT(obj, &view); - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - result = _hashlib_HASH_hash(self, view.buf, view.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, HASHLIB_GIL_MINSIZE, + result = _hashlib_HASH_hash(self, view.buf, view.len) + ); PyBuffer_Release(&view); return result < 0 ? NULL : Py_None; } @@ -1114,9 +1107,10 @@ _hashlib_HASH(PyObject *module, const char *digestname, PyObject *data_obj, if (view.buf && view.len) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - result = _hashlib_HASH_hash(self, view.buf, view.len); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + view.len, + result = _hashlib_HASH_hash(self, view.buf, view.len) + ); if (result == -1) { assert(PyErr_Occurred()); Py_CLEAR(self); @@ -1810,13 +1804,12 @@ _hmac_update(HMACobject *self, PyObject *obj) Py_buffer view = {0}; GET_BUFFER_VIEW_OR_ERROR(obj, &view, return 0); - Py_BEGIN_ALLOW_THREADS - HASHLIB_ACQUIRE_LOCK(self); - r = HMAC_Update(self->ctx, - (const unsigned char *)view.buf, - (size_t)view.len); - HASHLIB_RELEASE_LOCK(self); - Py_END_ALLOW_THREADS + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, view.len, + r = HMAC_Update( + self->ctx, (const unsigned char *)view.buf, (size_t)view.len + ) + ); PyBuffer_Release(&view); if (r == 0) { diff --git a/Modules/blake2module.c b/Modules/blake2module.c index ecff6467f7b37f..199b01e5b5c1ca 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -646,10 +646,10 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size, GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - HASHLIB_EXTERNAL_INSTRUCTIONS( + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( buf.len, blake2_update_unlocked(self, buf.buf, buf.len) - ) + ); PyBuffer_Release(&buf); } @@ -822,10 +822,10 @@ _blake2_blake2b_update_impl(Blake2Object *self, PyObject *data) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX( + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( self, buf.len, blake2_update_unlocked(self, buf.buf, buf.len) - ) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } diff --git a/Modules/hashlib.h b/Modules/hashlib.h index 35f2ee1a607a79..bb7e50ed6fc794 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -64,27 +64,33 @@ } while (0) #define HASHLIB_GIL_MINSIZE 2048 -#define HASHLIB_EXTERNAL_INSTRUCTIONS(SIZE, STATEMENTS) \ - if ((SIZE) > HASHLIB_GIL_MINSIZE) { \ - Py_BEGIN_ALLOW_THREADS \ - STATEMENTS; \ - Py_END_ALLOW_THREADS \ - } \ - else { \ - STATEMENTS; \ - } +#define HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(SIZE, STATEMENTS) \ + do { \ + if ((SIZE) > HASHLIB_GIL_MINSIZE) { \ + Py_BEGIN_ALLOW_THREADS \ + STATEMENTS; \ + Py_END_ALLOW_THREADS \ + } \ + else { \ + STATEMENTS; \ + } \ + } while (0) -#define HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX(OBJ, SIZE, STATEMENTS) \ - if ((SIZE) > HASHLIB_GIL_MINSIZE) { \ - Py_BEGIN_ALLOW_THREADS \ - HASHLIB_ACQUIRE_LOCK(OBJ); \ - STATEMENTS; \ - HASHLIB_RELEASE_LOCK(OBJ); \ - Py_END_ALLOW_THREADS \ - } \ - else { \ - STATEMENTS; \ - } +#define HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(OBJ, SIZE, STATEMENTS) \ + do { \ + if ((SIZE) > HASHLIB_GIL_MINSIZE) { \ + Py_BEGIN_ALLOW_THREADS \ + HASHLIB_ACQUIRE_LOCK(OBJ); \ + STATEMENTS; \ + HASHLIB_RELEASE_LOCK(OBJ); \ + Py_END_ALLOW_THREADS \ + } \ + else { \ + HASHLIB_ACQUIRE_LOCK(OBJ); \ + STATEMENTS; \ + HASHLIB_RELEASE_LOCK(OBJ); \ + } \ + } while (0) static inline int _Py_hashlib_data_argument(PyObject **res, PyObject *data, PyObject *string) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index df53c4ef0d69a6..18673b98e9c88d 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -776,7 +776,7 @@ _hmac_new_impl(PyObject *module, PyObject *keyobj, PyObject *msgobj, GET_BUFFER_VIEW_OR_ERROR(msgobj, &msg, goto error); /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - HASHLIB_EXTERNAL_INSTRUCTIONS( + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( msg.len, _hacl_hmac_state_update(self->state, msg.buf, msg.len) ); @@ -888,10 +888,10 @@ _hmac_HMAC_update_impl(HMACObject *self, PyObject *msgobj) int rc = 0; Py_buffer msg; GET_BUFFER_VIEW_OR_ERROUT(msgobj, &msg); - HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX( + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( self, msg.len, rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len) - ) + ); PyBuffer_Release(&msg); return rc < 0 ? NULL : Py_None; } diff --git a/Modules/md5module.c b/Modules/md5module.c index 94a070ba4ba5c6..e3398541189411 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -193,10 +193,10 @@ MD5Type_update_impl(MD5object *self, PyObject *obj) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX( + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( self, buf.len, _hacl_md5_state_update(self->hash_state, buf.buf, buf.len) - ) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -300,10 +300,10 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, if (string) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - HASHLIB_EXTERNAL_INSTRUCTIONS( + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( buf.len, _hacl_md5_state_update(new->hash_state, buf.buf, buf.len) - ) + ); PyBuffer_Release(&buf); } diff --git a/Modules/sha1module.c b/Modules/sha1module.c index fd940aa5c6988a..cb7bcd5a078326 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -196,10 +196,10 @@ SHA1Type_update_impl(SHA1object *self, PyObject *obj) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX( + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( self, buf.len, _hacl_sha1_state_update(self->hash_state, buf.buf, buf.len) - ) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -302,10 +302,10 @@ _sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, if (string) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - HASHLIB_EXTERNAL_INSTRUCTIONS( + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( buf.len, _hacl_sha1_state_update(new->hash_state, buf.buf, buf.len) - ) + ); PyBuffer_Release(&buf); } diff --git a/Modules/sha2module.c b/Modules/sha2module.c index 7eb397858495a6..f4c4ba3254849b 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -406,10 +406,10 @@ SHA256Type_update_impl(SHA256object *self, PyObject *obj) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX( + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( self, buf.len, _hacl_sha2_state_update_256(self->state, buf.buf, buf.len) - ) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -429,10 +429,10 @@ SHA512Type_update_impl(SHA512object *self, PyObject *obj) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX( + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( self, buf.len, _hacl_sha2_state_update_512(self->state, buf.buf, buf.len) - ) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -614,10 +614,10 @@ _sha2_sha256_impl(PyObject *module, PyObject *data, int usedforsecurity, if (string) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - HASHLIB_EXTERNAL_INSTRUCTIONS( + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( buf.len, _hacl_sha2_state_update_256(new->state, buf.buf, buf.len) - ) + ); PyBuffer_Release(&buf); } @@ -672,10 +672,10 @@ _sha2_sha224_impl(PyObject *module, PyObject *data, int usedforsecurity, if (string) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - HASHLIB_EXTERNAL_INSTRUCTIONS( + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( buf.len, _hacl_sha2_state_update_256(new->state, buf.buf, buf.len) - ) + ); PyBuffer_Release(&buf); } @@ -731,10 +731,10 @@ _sha2_sha512_impl(PyObject *module, PyObject *data, int usedforsecurity, if (string) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - HASHLIB_EXTERNAL_INSTRUCTIONS( + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( buf.len, _hacl_sha2_state_update_512(new->state, buf.buf, buf.len) - ) + ); PyBuffer_Release(&buf); } @@ -790,10 +790,10 @@ _sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity, if (string) { /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - HASHLIB_EXTERNAL_INSTRUCTIONS( + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( buf.len, _hacl_sha2_state_update_512(new->state, buf.buf, buf.len) - ) + ); PyBuffer_Release(&buf); } diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 26e6692e0ad5fd..f9983fe509b36c 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -163,10 +163,10 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); /* Do not use self->mutex here as this is the constructor * where it is not yet possible to have concurrent access. */ - HASHLIB_EXTERNAL_INSTRUCTIONS( + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( buf.len, _hacl_sha3_state_update(self->hash_state, buf.buf, buf.len) - ) + ); } PyBuffer_Release(&buf); @@ -298,10 +298,10 @@ _sha3_sha3_224_update_impl(SHA3object *self, PyObject *data) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - HASHLIB_EXTERNAL_INSTRUCTIONS_WITH_MUTEX( + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( self, buf.len, _hacl_sha3_state_update(self->hash_state, buf.buf, buf.len) - ) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } From 5cd60d1556b5632359ba43d8b6871690db94ceb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 14:27:22 +0200 Subject: [PATCH 26/41] improve tests --- Lib/test/test_hashlib.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index ca9c561ea803f3..1955d0b2a9e9c5 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -1027,28 +1027,30 @@ def test_sha256_gil(self): @threading_helper.reap_threads @threading_helper.requires_working_threading() def test_threaded_hashing(self): - for constructor in self.hash_constructors: - if constructor().name not in self.shakes: - with self.subTest(constructor=constructor): - self.do_test_threaded_hashing(constructor) + for algorithm, constructors in self.constructors_to_test.items(): + is_shake = algorithm in self.shakes + for constructor in constructors: + with self.subTest(constructor=constructor, is_shake=is_shake): + self.do_test_threaded_hashing(constructor, is_shake) - def do_test_threaded_hashing(self, constructor): + def do_test_threaded_hashing(self, constructor, is_shake): # Updating the same hash object from several threads at once # using data chunk sizes containing the same byte sequences. # # If the internal locks are working to prevent multiple # updates on the same object from running at once, the resulting # hash will be the same as doing it single threaded upfront. - hasher = constructor() num_threads = 5 - smallest_data = b'swineflu' + smallest_data = os.urandom(16) data = smallest_data * 200000 - expected_hash = constructor(data*num_threads).hexdigest() + + h1 = constructor() + h2 = constructor(data * num_threads) def hash_in_chunks(chunk_size): index = 0 while index < len(data): - hasher.update(data[index:index + chunk_size]) + h1.update(data[index:index + chunk_size]) index += chunk_size threads = [] @@ -1065,7 +1067,10 @@ def hash_in_chunks(chunk_size): for thread in threads: thread.join() - self.assertEqual(expected_hash, hasher.hexdigest()) + if is_shake: + self.assertEqual(h1.hexdigest(16), h2.hexdigest(16)) + else: + self.assertEqual(h1.hexdigest(), h2.hexdigest()) def test_get_fips_mode(self): fips_mode = self.is_fips_mode From a2fcbd5f8a3e354a2b5d26d2c802b11fafdb3c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 14:29:02 +0200 Subject: [PATCH 27/41] fixup HMAC --- Modules/hmacmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 18673b98e9c88d..a80aa1cb5b4103 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -778,7 +778,7 @@ _hmac_new_impl(PyObject *module, PyObject *keyobj, PyObject *msgobj, * where it is not yet possible to have concurrent access. */ HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( msg.len, - _hacl_hmac_state_update(self->state, msg.buf, msg.len) + rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len) ); PyBuffer_Release(&msg); #ifndef NDEBUG From 417cee1587b79462fd39842ed1f2a7f1f5d0dc6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 14:49:57 +0200 Subject: [PATCH 28/41] fixup --- Modules/hmacmodule.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index a80aa1cb5b4103..116374adc773f1 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -390,33 +390,39 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) static int _hacl_convert_errno(hacl_errno_t code) { + int res = -1; + PyGILState_STATE gstate = PyGILState_Ensure(); switch (code) { case Hacl_Streaming_Types_Success: { - return 0; + res = 0; + goto finally; } case Hacl_Streaming_Types_InvalidAlgorithm: { - PyErr_Format(PyExc_ValueError, "invalid HACL* algorithm"); - return -1; + PyErr_SetString(PyExc_ValueError, "invalid HACL* algorithm"); + goto finally; } case Hacl_Streaming_Types_InvalidLength: { PyErr_SetString(PyExc_ValueError, "invalid length"); - return -1; + goto finally; } case Hacl_Streaming_Types_MaximumLengthExceeded: { PyErr_SetString(PyExc_OverflowError, "maximum length exceeded"); - return -1; + goto finally; } case Hacl_Streaming_Types_OutOfMemory: { PyErr_NoMemory(); - return -1; + goto finally; } default: { PyErr_Format(PyExc_RuntimeError, "HACL* internal routine failed with error code: %d", code); - return -1; + goto finally; } } +finally: + PyGILState_Release(gstate); + return res; } /* @@ -483,7 +489,7 @@ _hacl_hmac_state_update(HACL_HMAC_state *state, uint8_t *buf, Py_ssize_t len) assert(len >= 0); #ifdef Py_HMAC_SSIZE_LARGER_THAN_UINT32 while (len > UINT32_MAX_AS_SSIZE_T) { - if (_hacl_hmac_state_update_once(state, buf, UINT32_MAX)) { + if (_hacl_hmac_state_update_once(state, buf, UINT32_MAX) < 0) { assert(PyErr_Occurred()); return -1; } @@ -492,7 +498,9 @@ _hacl_hmac_state_update(HACL_HMAC_state *state, uint8_t *buf, Py_ssize_t len) } #endif if (len > UINT32_MAX_AS_SSIZE_T) { + PyGILState_STATE gstate = PyGILState_Ensure(); PyErr_Format(PyExc_ValueError, "invalid length: %zd (max: %ju)", len, UINT32_MAX); + PyGILState_Release(gstate); return -1; } return _hacl_hmac_state_update_once(state, buf, len); @@ -781,13 +789,9 @@ _hmac_new_impl(PyObject *module, PyObject *keyobj, PyObject *msgobj, rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len) ); PyBuffer_Release(&msg); -#ifndef NDEBUG if (rc < 0) { goto error; } -#else - (void)rc; -#endif } assert(rc == 0); PyObject_GC_Track(self); From f350501c4b1264d2e63e9448026c9964cfdfaae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 15:09:58 +0200 Subject: [PATCH 29/41] GIL protection --- Modules/hmacmodule.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 116374adc773f1..7c8b44c84230a3 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -390,39 +390,36 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) static int _hacl_convert_errno(hacl_errno_t code) { - int res = -1; + if (code == Hacl_Streaming_Types_Success) { + return 0; + } PyGILState_STATE gstate = PyGILState_Ensure(); switch (code) { - case Hacl_Streaming_Types_Success: { - res = 0; - goto finally; - } case Hacl_Streaming_Types_InvalidAlgorithm: { PyErr_SetString(PyExc_ValueError, "invalid HACL* algorithm"); - goto finally; + break; } case Hacl_Streaming_Types_InvalidLength: { PyErr_SetString(PyExc_ValueError, "invalid length"); - goto finally; + break; } case Hacl_Streaming_Types_MaximumLengthExceeded: { PyErr_SetString(PyExc_OverflowError, "maximum length exceeded"); - goto finally; + break; } case Hacl_Streaming_Types_OutOfMemory: { PyErr_NoMemory(); - goto finally; + break; } default: { PyErr_Format(PyExc_RuntimeError, "HACL* internal routine failed with error code: %d", code); - goto finally; + break; } } -finally: PyGILState_Release(gstate); - return res; + return -1; } /* From 5c4009dacef9473e8aa783dba50c2bc771c5d231 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 15:10:08 +0200 Subject: [PATCH 30/41] show WASI errors --- Modules/hmacmodule.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 7c8b44c84230a3..7cb377e892b1ee 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -496,7 +496,8 @@ _hacl_hmac_state_update(HACL_HMAC_state *state, uint8_t *buf, Py_ssize_t len) #endif if (len > UINT32_MAX_AS_SSIZE_T) { PyGILState_STATE gstate = PyGILState_Ensure(); - PyErr_Format(PyExc_ValueError, "invalid length: %zd (max: %ju)", len, UINT32_MAX); + PyErr_Format(PyExc_ValueError, "invalid length: %zd (max: %u)", + len, UINT32_MAX); PyGILState_Release(gstate); return -1; } From 8aec797072562522f7dc3b618d076a941bc02c3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 15:22:28 +0200 Subject: [PATCH 31/41] fix WASI --- Modules/hmacmodule.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 7cb377e892b1ee..6d02869ac77c1e 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -494,14 +494,8 @@ _hacl_hmac_state_update(HACL_HMAC_state *state, uint8_t *buf, Py_ssize_t len) len -= UINT32_MAX; } #endif - if (len > UINT32_MAX_AS_SSIZE_T) { - PyGILState_STATE gstate = PyGILState_Ensure(); - PyErr_Format(PyExc_ValueError, "invalid length: %zd (max: %u)", - len, UINT32_MAX); - PyGILState_Release(gstate); - return -1; - } - return _hacl_hmac_state_update_once(state, buf, len); + assert(Py_CHECK_HACL_UINT32_T_LENGTH(len)); + return _hacl_hmac_state_update_once(state, buf, (uint32_t)len); } /* Static information used to construct the hash table. */ @@ -787,9 +781,13 @@ _hmac_new_impl(PyObject *module, PyObject *keyobj, PyObject *msgobj, rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len) ); PyBuffer_Release(&msg); +#ifndef NDEBUG if (rc < 0) { goto error; } +#else + (void)rc; +#endif } assert(rc == 0); PyObject_GC_Track(self); From 6db58dc3b9e0593fca76586b5e54a03d29d4f659 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 15:24:21 +0200 Subject: [PATCH 32/41] fix compilation --- Modules/hmacmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 6d02869ac77c1e..c99939961de5fe 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -494,7 +494,7 @@ _hacl_hmac_state_update(HACL_HMAC_state *state, uint8_t *buf, Py_ssize_t len) len -= UINT32_MAX; } #endif - assert(Py_CHECK_HACL_UINT32_T_LENGTH(len)); + Py_CHECK_HACL_UINT32_T_LENGTH(len); return _hacl_hmac_state_update_once(state, buf, (uint32_t)len); } From b1f94635c918dd44676ac4ee0003bef65f3e8310 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 15:24:44 +0200 Subject: [PATCH 33/41] fix compilation --- Modules/hmacmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index c99939961de5fe..268b6747141f55 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -463,8 +463,8 @@ _hacl_hmac_state_update_once(HACL_HMAC_state *state, uint8_t *buf, Py_ssize_t len) { assert(len >= 0); -#ifndef NDEBUG Py_CHECK_HACL_UINT32_T_LENGTH(len); +#ifndef NDEBUG hacl_errno_t code = Hacl_Streaming_HMAC_update(state, buf, (uint32_t)len); return _hacl_convert_errno(code); #else From 491b9221275e069945bbcf778e9321853704ed72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 16 Jun 2025 15:27:06 +0200 Subject: [PATCH 34/41] fix warnings --- Modules/hmacmodule.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 268b6747141f55..f28e14892b2b00 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -460,15 +460,13 @@ _hacl_hmac_state_free(HACL_HMAC_state *state) */ static int _hacl_hmac_state_update_once(HACL_HMAC_state *state, - uint8_t *buf, Py_ssize_t len) + uint8_t *buf, uint32_t len) { - assert(len >= 0); - Py_CHECK_HACL_UINT32_T_LENGTH(len); #ifndef NDEBUG - hacl_errno_t code = Hacl_Streaming_HMAC_update(state, buf, (uint32_t)len); + hacl_errno_t code = Hacl_Streaming_HMAC_update(state, buf, len); return _hacl_convert_errno(code); #else - (void)Hacl_Streaming_HMAC_update(state, buf, (uint32_t)len); + (void)Hacl_Streaming_HMAC_update(state, buf, len); return 0; #endif } From c048975cd32b714b18f3e5ad6f750113ac24e622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 17 Jun 2025 11:48:26 +0200 Subject: [PATCH 35/41] sync --- Modules/hmacmodule.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index f28e14892b2b00..7f32bb42744fc2 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -390,9 +390,11 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) static int _hacl_convert_errno(hacl_errno_t code) { + assert(PyGILState_GetThisThreadState() != NULL); if (code == Hacl_Streaming_Types_Success) { return 0; } + PyGILState_STATE gstate = PyGILState_Ensure(); switch (code) { case Hacl_Streaming_Types_InvalidAlgorithm: { From c9044d26ea9ae5be780bd21128a66ec0fadcd6ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 17 Jun 2025 11:49:00 +0200 Subject: [PATCH 36/41] fixup format string --- Modules/hmacmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 7f32bb42744fc2..0e5f7c852dbbfe 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -415,7 +415,7 @@ _hacl_convert_errno(hacl_errno_t code) } default: { PyErr_Format(PyExc_RuntimeError, - "HACL* internal routine failed with error code: %d", + "HACL* internal routine failed with error code: %u", code); break; } From 3849f2d05b3e8e9cb71f90cecb2bb390f4d1fb71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 17 Jun 2025 12:48:29 +0200 Subject: [PATCH 37/41] pformat clinic directives --- Modules/sha3module.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 9a7115843c0d28..0ae6d101c317cd 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -71,14 +71,14 @@ typedef struct { /*[clinic input] module _sha3 -class _sha3.sha3_224 "SHA3object *" "clinic_state()->sha3_224_type" -class _sha3.sha3_256 "SHA3object *" "clinic_state()->sha3_256_type" -class _sha3.sha3_384 "SHA3object *" "clinic_state()->sha3_384_type" -class _sha3.sha3_512 "SHA3object *" "clinic_state()->sha3_512_type" -class _sha3.shake_128 "SHA3object *" "clinic_state()->shake_128_type" -class _sha3.shake_256 "SHA3object *" "clinic_state()->shake_256_type" +class _sha3.sha3_224 "SHA3object *" "clinic_state()->sha3_224_type" +class _sha3.sha3_256 "SHA3object *" "clinic_state()->sha3_256_type" +class _sha3.sha3_384 "SHA3object *" "clinic_state()->sha3_384_type" +class _sha3.sha3_512 "SHA3object *" "clinic_state()->sha3_512_type" +class _sha3.shake_128 "SHA3object *" "clinic_state()->shake_128_type" +class _sha3.shake_256 "SHA3object *" "clinic_state()->shake_256_type" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=83376ec869f33016]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=91cc5a9fb4be1976]*/ #define clinic_state() (get_sha3module_state_by_cls(Py_TYPE(self))) #include "clinic/sha3module.c.h" From 6c08f0d2fb15f8ec9ac6568851e683bd8275ad36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 17 Jun 2025 18:20:10 +0200 Subject: [PATCH 38/41] address review --- Modules/_hashopenssl.c | 4 +-- Modules/blake2module.c | 2 +- Modules/hashlib.h | 55 +++++++++++++++++++++++++++--------------- Modules/hmacmodule.c | 2 +- Modules/md5module.c | 2 +- Modules/sha1module.c | 2 +- Modules/sha2module.c | 4 +-- Modules/sha3module.c | 2 +- 8 files changed, 44 insertions(+), 29 deletions(-) diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 3f454578411d26..4ca5b5698cff3c 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -278,14 +278,14 @@ get_hashlib_state(PyObject *module) } typedef struct { - PyObject_HASHLIB_HEAD + HASHLIB_OBJECT_HEAD EVP_MD_CTX *ctx; /* OpenSSL message digest context */ } HASHobject; #define HASHobject_CAST(op) ((HASHobject *)(op)) typedef struct { - PyObject_HASHLIB_HEAD + HASHLIB_OBJECT_HEAD HMAC_CTX *ctx; /* OpenSSL hmac context */ } HMACobject; diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 199b01e5b5c1ca..295bca07650916 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -352,7 +352,7 @@ type_to_impl(PyTypeObject *type) } typedef struct { - PyObject_HASHLIB_HEAD + HASHLIB_OBJECT_HEAD union { Hacl_Hash_Blake2s_state_t *blake2s_state; Hacl_Hash_Blake2b_state_t *blake2b_state; diff --git a/Modules/hashlib.h b/Modules/hashlib.h index bb7e50ed6fc794..5800772a374440 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -3,9 +3,9 @@ #include "pycore_lock.h" // PyMutex /* - * Given a PyObject* obj, fill in the Py_buffer* viewp with the result - * of PyObject_GetBuffer. Sets an exception and issues the erraction - * on any errors, e.g. 'return NULL' or 'goto error'. + * Given a PyObject* 'obj', fill in the Py_buffer* 'viewp' with the result + * of PyObject_GetBuffer. Sets an exception and issues the 'erraction' + * on any errors, e.g., 'return NULL' or 'goto error'. */ #define GET_BUFFER_VIEW_OR_ERROR(obj, viewp, erraction) do { \ if (PyUnicode_Check((obj))) { \ @@ -34,36 +34,46 @@ /* * Helper code to synchronize access to the hash object when the GIL is - * released around a CPU consuming hashlib operation. All code paths that - * access a mutable part of obj must be enclosed in an ENTER_HASHLIB / - * LEAVE_HASHLIB block or explicitly acquire and release the lock inside - * a PY_BEGIN / END_ALLOW_THREADS block if they wish to release the GIL for - * an operation. + * released around a CPU consuming hashlib operation. * - * These only drop the GIL if the lock acquisition itself is likely to - * block. Thus the non-blocking acquire gating the GIL release for a - * blocking lock acquisition. The intent of these macros is to surround - * the assumed always "fast" operations that you aren't releasing the - * GIL around. Otherwise use code similar to what you see in hash - * function update() methods. + * Code accessing a mutable part of the hash object must be enclosed in + * an HASHLIB_{ACQUIRE,RELEASE}_LOCK block or explicitly acquire and release + * the mutex inside a Py_BEGIN_ALLOW_THREADS -- Py_END_ALLOW_THREADS block if + * they wish to release the GIL for an operation. */ -#include "pythread.h" - -#define PyObject_HASHLIB_HEAD \ +#define HASHLIB_OBJECT_HEAD \ PyObject_HEAD \ /* Guard against race conditions during incremental update(). */ \ PyMutex mutex; -#define HASHLIB_ACQUIRE_LOCK(OBJ) PyMutex_Lock(&(OBJ)->mutex) -#define HASHLIB_RELEASE_LOCK(OBJ) PyMutex_Unlock(&(OBJ)->mutex) - #define HASHLIB_INIT_MUTEX(OBJ) \ do { \ (OBJ)->mutex = (PyMutex){0}; \ } while (0) +#define HASHLIB_ACQUIRE_LOCK(OBJ) PyMutex_Lock(&(OBJ)->mutex) +#define HASHLIB_RELEASE_LOCK(OBJ) PyMutex_Unlock(&(OBJ)->mutex) + +/* + * Message length above which the GIL is to be released + * when performing hashing operations. + */ #define HASHLIB_GIL_MINSIZE 2048 + +// Macros for executing code while conditionally holding the GIL. +// +// These only drop the GIL if the lock acquisition itself is likely to +// block. Thus the non-blocking acquire gating the GIL release for a +// blocking lock acquisition. The intent of these macros is to surround +// the assumed always "fast" operations that you aren't releasing the +// GIL around. + +/* + * Execute a suite of C statements 'STATEMENTS'. + * + * The GIL is held if 'SIZE' is below the HASHLIB_GIL_MINSIZE threshold. + */ #define HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(SIZE, STATEMENTS) \ do { \ if ((SIZE) > HASHLIB_GIL_MINSIZE) { \ @@ -76,6 +86,11 @@ } \ } while (0) +/* + * Lock 'OBJ' and execute a suite of C statements 'STATEMENTS'. + * + * The GIL is held if 'SIZE' is below the HASHLIB_GIL_MINSIZE threshold. + */ #define HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(OBJ, SIZE, STATEMENTS) \ do { \ if ((SIZE) > HASHLIB_GIL_MINSIZE) { \ diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 0e5f7c852dbbfe..fa00a481918b5d 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -283,7 +283,7 @@ get_hmacmodule_state_by_cls(PyTypeObject *cls) typedef Hacl_Streaming_HMAC_agile_state HACL_HMAC_state; typedef struct HMACObject { - PyObject_HASHLIB_HEAD + HASHLIB_OBJECT_HEAD // Hash function information PyObject *name; // rendered name (exact unicode object) HMAC_Hash_Kind kind; // can be used for runtime dispatch (must be known) diff --git a/Modules/md5module.c b/Modules/md5module.c index e3398541189411..d5f66751258d08 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -38,7 +38,7 @@ class MD5Type "MD5object *" "&PyType_Type" #include "_hacl/Hacl_Hash_MD5.h" typedef struct { - PyObject_HASHLIB_HEAD + HASHLIB_OBJECT_HEAD Hacl_Hash_MD5_state_t *hash_state; } MD5object; diff --git a/Modules/sha1module.c b/Modules/sha1module.c index cb7bcd5a078326..251fb089a62860 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -38,7 +38,7 @@ class SHA1Type "SHA1object *" "&PyType_Type" #include "_hacl/Hacl_Hash_SHA1.h" typedef struct { - PyObject_HASHLIB_HEAD + HASHLIB_OBJECT_HEAD Hacl_Hash_SHA1_state_t *hash_state; } SHA1object; diff --git a/Modules/sha2module.c b/Modules/sha2module.c index f4c4ba3254849b..51dbdc7b7b6823 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -50,13 +50,13 @@ class SHA512Type "SHA512object *" "&PyType_Type" // TODO: Get rid of int digestsize in favor of Hacl state info? typedef struct { - PyObject_HASHLIB_HEAD + HASHLIB_OBJECT_HEAD int digestsize; Hacl_Hash_SHA2_state_t_256 *state; } SHA256object; typedef struct { - PyObject_HASHLIB_HEAD + HASHLIB_OBJECT_HEAD int digestsize; Hacl_Hash_SHA2_state_t_512 *state; } SHA512object; diff --git a/Modules/sha3module.c b/Modules/sha3module.c index f9983fe509b36c..ee7d33116c10e4 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -59,7 +59,7 @@ class _sha3.shake_256 "SHA3object *" "&SHAKE256type" #include "_hacl/Hacl_Hash_SHA3.h" typedef struct { - PyObject_HASHLIB_HEAD + HASHLIB_OBJECT_HEAD Hacl_Hash_SHA3_state_t *hash_state; } SHA3object; From 7fd139625b7b63dd664bf9694969577e9175a44a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 20 Jun 2025 09:57:26 +0200 Subject: [PATCH 39/41] reudce diff --- Modules/md5module.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Modules/md5module.c b/Modules/md5module.c index d5f66751258d08..b64cbdaf86673e 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -22,7 +22,6 @@ #include "Python.h" #include "hashlib.h" -#include "pycore_strhex.h" // _Py_strhex() /*[clinic input] module _md5 @@ -133,7 +132,7 @@ static PyObject * MD5Type_digest_impl(MD5object *self) /*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/ { - uint8_t digest[MD5_DIGESTSIZE]; + unsigned char digest[MD5_DIGESTSIZE]; HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_MD5_digest(self->hash_state, digest); HASHLIB_RELEASE_LOCK(self); @@ -150,11 +149,20 @@ static PyObject * MD5Type_hexdigest_impl(MD5object *self) /*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/ { - uint8_t digest[MD5_DIGESTSIZE]; + unsigned char digest[MD5_DIGESTSIZE]; HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_MD5_digest(self->hash_state, digest); HASHLIB_RELEASE_LOCK(self); - return _Py_strhex((const char *)digest, MD5_DIGESTSIZE); + + const char *hexdigits = "0123456789abcdef"; + char digest_hex[MD5_DIGESTSIZE * 2]; + char *str = digest_hex; + for (size_t i=0; i < MD5_DIGESTSIZE; i++) { + unsigned char byte = digest[i]; + *str++ = hexdigits[byte >> 4]; + *str++ = hexdigits[byte & 0x0f]; + } + return PyUnicode_FromStringAndSize(digest_hex, sizeof(digest_hex)); } static void From d0cfb7ccde1aa80a0e5a1c28521509f56d122745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 22 Jun 2025 17:57:57 +0200 Subject: [PATCH 40/41] post-merge --- Modules/hashlib.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Modules/hashlib.h b/Modules/hashlib.h index 178b78217bf1eb..5800772a374440 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -2,14 +2,6 @@ #include "pycore_lock.h" // PyMutex -#define HASHLIB_MUTEX_API \ - /* - * Attributes to prevent undefined behaviors - * via multiple threads entering the C API. - */ \ - bool use_mutex; \ - PyMutex mutex; - /* * Given a PyObject* 'obj', fill in the Py_buffer* 'viewp' with the result * of PyObject_GetBuffer. Sets an exception and issues the 'erraction' From c86687f514ab0ce64f50301a49e3b9316cecdbd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 22 Jun 2025 18:01:15 +0200 Subject: [PATCH 41/41] diff reduce --- Modules/hmacmodule.c | 1 - Modules/md5module.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 7e5a0460919ca4..bf47f28d081f9a 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -284,7 +284,6 @@ typedef Hacl_Streaming_HMAC_agile_state HACL_HMAC_state; typedef struct HMACObject { HASHLIB_OBJECT_HEAD - // Hash function information PyObject *name; // rendered name (exact unicode object) HMAC_Hash_Kind kind; // can be used for runtime dispatch (must be known) diff --git a/Modules/md5module.c b/Modules/md5module.c index 03e7ecbb817e47..3007bea8e87146 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -178,12 +178,12 @@ static void _hacl_md5_state_update(Hacl_Hash_MD5_state_t *state, uint8_t *buf, Py_ssize_t len) { - assert(len >= 0); /* * Note: we explicitly ignore the error code on the basis that it would * take more than 1 billion years to overflow the maximum admissible length * for MD5 (2^61 - 1). */ + assert(len >= 0); #if PY_SSIZE_T_MAX > UINT32_MAX while (len > UINT32_MAX) { (void)Hacl_Hash_MD5_update(state, buf, UINT32_MAX);