From a94db65556df426473e06a95bae925c5c4ff3f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 25 Jan 2025 11:42:24 +0100 Subject: [PATCH 1/3] fix UBSan failures for `json::PyScannerObject` --- Modules/_json.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index a99abbe72bf7a0..1d1daeb24093ee 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -27,6 +27,8 @@ typedef struct _PyScannerObject { PyObject *parse_constant; } PyScannerObject; +#define _PyScannerObject_CAST(op) ((PyScannerObject *)(op)) + static PyMemberDef scanner_members[] = { {"strict", Py_T_BOOL, offsetof(PyScannerObject, strict), Py_READONLY, "strict"}, {"object_hook", _Py_T_OBJECT, offsetof(PyScannerObject, object_hook), Py_READONLY, "object_hook"}, @@ -69,6 +71,7 @@ static PyObject * ascii_escape_unicode(PyObject *pystr); static PyObject * py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr); + static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * @@ -78,7 +81,8 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); static void scanner_dealloc(PyObject *self); static int -scanner_clear(PyScannerObject *self); +scanner_clear(PyObject *self); + static PyObject * encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); static void @@ -617,14 +621,15 @@ scanner_dealloc(PyObject *self) PyTypeObject *tp = Py_TYPE(self); /* bpo-31095: UnTrack is needed before calling any callbacks */ PyObject_GC_UnTrack(self); - scanner_clear((PyScannerObject *)self); + (void)scanner_clear(self); tp->tp_free(self); Py_DECREF(tp); } static int -scanner_traverse(PyScannerObject *self, visitproc visit, void *arg) +scanner_traverse(PyObject *op, visitproc visit, void *arg) { + PyScannerObject *self = _PyScannerObject_CAST(op); Py_VISIT(Py_TYPE(self)); Py_VISIT(self->object_hook); Py_VISIT(self->object_pairs_hook); @@ -635,8 +640,9 @@ scanner_traverse(PyScannerObject *self, visitproc visit, void *arg) } static int -scanner_clear(PyScannerObject *self) +scanner_clear(PyObject *op) { + PyScannerObject *self = _PyScannerObject_CAST(op); Py_CLEAR(self->object_hook); Py_CLEAR(self->object_pairs_hook); Py_CLEAR(self->parse_float); @@ -1106,7 +1112,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssize_ } static PyObject * -scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds) +scanner_call(PyObject *self, PyObject *args, PyObject *kwds) { /* Python callable interface to scan_once_{str,unicode} */ PyObject *pystr; @@ -1128,7 +1134,8 @@ scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds) if (memo == NULL) { return NULL; } - rval = scan_once_unicode(self, memo, pystr, idx, &next_idx); + rval = scan_once_unicode(_PyScannerObject_CAST(self), + memo, pystr, idx, &next_idx); Py_DECREF(memo); if (rval == NULL) return NULL; From 2f4139416ce0c1c6242786e85371fa3cb3600ae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 25 Jan 2025 11:44:22 +0100 Subject: [PATCH 2/3] fix UBSan failures for `json::PyEncoderObject` --- Modules/_json.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 1d1daeb24093ee..e31dd2e82ce431 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -53,6 +53,8 @@ typedef struct _PyEncoderObject { PyCFunction fast_encode; } PyEncoderObject; +#define _PyEncoderObject_CAST(op) ((PyEncoderObject *)(op)) + static PyMemberDef encoder_members[] = { {"markers", _Py_T_OBJECT, offsetof(PyEncoderObject, markers), Py_READONLY, "markers"}, {"default", _Py_T_OBJECT, offsetof(PyEncoderObject, defaultfn), Py_READONLY, "default"}, @@ -88,7 +90,7 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); static void encoder_dealloc(PyObject *self); static int -encoder_clear(PyEncoderObject *self); +encoder_clear(PyObject *self); static int encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level, PyObject *indent_cache); static int @@ -1250,8 +1252,7 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (PyCFunction_Check(s->encoder)) { PyCFunction f = PyCFunction_GetFunction(s->encoder); - if (f == (PyCFunction)py_encode_basestring_ascii || - f == (PyCFunction)py_encode_basestring) { + if (f == py_encode_basestring_ascii || f == py_encode_basestring) { s->fast_encode = f; } } @@ -1346,12 +1347,13 @@ write_newline_indent(PyUnicodeWriter *writer, static PyObject * -encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds) +encoder_call(PyObject *op, PyObject *args, PyObject *kwds) { /* Python callable interface to encode_listencode_obj */ static char *kwlist[] = {"obj", "_current_indent_level", NULL}; PyObject *obj; Py_ssize_t indent_level; + PyEncoderObject *self = _PyEncoderObject_CAST(op); if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist, &obj, &indent_level)) @@ -1823,14 +1825,15 @@ encoder_dealloc(PyObject *self) PyTypeObject *tp = Py_TYPE(self); /* bpo-31095: UnTrack is needed before calling any callbacks */ PyObject_GC_UnTrack(self); - encoder_clear((PyEncoderObject *)self); + (void)encoder_clear(self); tp->tp_free(self); Py_DECREF(tp); } static int -encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg) +encoder_traverse(PyObject *op, visitproc visit, void *arg) { + PyEncoderObject *self = _PyEncoderObject_CAST(op); Py_VISIT(Py_TYPE(self)); Py_VISIT(self->markers); Py_VISIT(self->defaultfn); @@ -1842,8 +1845,9 @@ encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg) } static int -encoder_clear(PyEncoderObject *self) +encoder_clear(PyObject *op) { + PyEncoderObject *self = _PyEncoderObject_CAST(op); /* Deallocate Encoder */ Py_CLEAR(self->markers); Py_CLEAR(self->defaultfn); @@ -1877,15 +1881,15 @@ static PyType_Spec PyEncoderType_spec = { static PyMethodDef speedups_methods[] = { {"encode_basestring_ascii", - (PyCFunction)py_encode_basestring_ascii, + py_encode_basestring_ascii, METH_O, pydoc_encode_basestring_ascii}, {"encode_basestring", - (PyCFunction)py_encode_basestring, + py_encode_basestring, METH_O, pydoc_encode_basestring}, {"scanstring", - (PyCFunction)py_scanstring, + py_scanstring, METH_VARARGS, pydoc_scanstring}, {NULL, NULL, 0, NULL} From e199b8b4e83201164b6e0ca1487717b670be0c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 8 Feb 2025 10:10:48 +0100 Subject: [PATCH 3/3] Do not use `_` + capital letter in new code as it is also UB. --- Modules/_json.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 0ca0edf3d75d0c..7b3328a4c9145d 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -27,7 +27,7 @@ typedef struct _PyScannerObject { PyObject *parse_constant; } PyScannerObject; -#define _PyScannerObject_CAST(op) ((PyScannerObject *)(op)) +#define PyScannerObject_CAST(op) ((PyScannerObject *)(op)) static PyMemberDef scanner_members[] = { {"strict", Py_T_BOOL, offsetof(PyScannerObject, strict), Py_READONLY, "strict"}, @@ -53,7 +53,7 @@ typedef struct _PyEncoderObject { PyCFunction fast_encode; } PyEncoderObject; -#define _PyEncoderObject_CAST(op) ((PyEncoderObject *)(op)) +#define PyEncoderObject_CAST(op) ((PyEncoderObject *)(op)) static PyMemberDef encoder_members[] = { {"markers", _Py_T_OBJECT, offsetof(PyEncoderObject, markers), Py_READONLY, "markers"}, @@ -640,7 +640,7 @@ scanner_dealloc(PyObject *self) static int scanner_traverse(PyObject *op, visitproc visit, void *arg) { - PyScannerObject *self = _PyScannerObject_CAST(op); + PyScannerObject *self = PyScannerObject_CAST(op); Py_VISIT(Py_TYPE(self)); Py_VISIT(self->object_hook); Py_VISIT(self->object_pairs_hook); @@ -653,7 +653,7 @@ scanner_traverse(PyObject *op, visitproc visit, void *arg) static int scanner_clear(PyObject *op) { - PyScannerObject *self = _PyScannerObject_CAST(op); + PyScannerObject *self = PyScannerObject_CAST(op); Py_CLEAR(self->object_hook); Py_CLEAR(self->object_pairs_hook); Py_CLEAR(self->parse_float); @@ -1145,7 +1145,7 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds) if (memo == NULL) { return NULL; } - rval = scan_once_unicode(_PyScannerObject_CAST(self), + rval = scan_once_unicode(PyScannerObject_CAST(self), memo, pystr, idx, &next_idx); Py_DECREF(memo); if (rval == NULL) @@ -1362,7 +1362,7 @@ encoder_call(PyObject *op, PyObject *args, PyObject *kwds) static char *kwlist[] = {"obj", "_current_indent_level", NULL}; PyObject *obj; Py_ssize_t indent_level; - PyEncoderObject *self = _PyEncoderObject_CAST(op); + PyEncoderObject *self = PyEncoderObject_CAST(op); if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist, &obj, &indent_level)) @@ -1842,7 +1842,7 @@ encoder_dealloc(PyObject *self) static int encoder_traverse(PyObject *op, visitproc visit, void *arg) { - PyEncoderObject *self = _PyEncoderObject_CAST(op); + PyEncoderObject *self = PyEncoderObject_CAST(op); Py_VISIT(Py_TYPE(self)); Py_VISIT(self->markers); Py_VISIT(self->defaultfn); @@ -1856,7 +1856,7 @@ encoder_traverse(PyObject *op, visitproc visit, void *arg) static int encoder_clear(PyObject *op) { - PyEncoderObject *self = _PyEncoderObject_CAST(op); + PyEncoderObject *self = PyEncoderObject_CAST(op); /* Deallocate Encoder */ Py_CLEAR(self->markers); Py_CLEAR(self->defaultfn);