From ec9c0252db71dce8e738d0e2209c5b72156724e8 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sat, 15 Jul 2023 10:45:20 -0500 Subject: [PATCH 1/6] BUG: fix choose refcount leak * Fixes #22683 * use `copyswap` to avoid the reference count leaking reported above when `np.choose` is used with `out` * my impression from the ticket is that Sebastian doesn't think `copyswap` is a perfect solution, but may suffice short-term? --- numpy/core/src/multiarray/item_selection.c | 8 +++++--- numpy/core/tests/test_multiarray.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index f42ae7c2d0d8..aec68418fa4f 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -962,7 +962,8 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, { PyArrayObject *obj = NULL; PyArray_Descr *dtype; - int n, elsize; + PyArray_CopySwapFunc *copyswap; + int n, elsize, swap; npy_intp i; char *ret_data; PyArrayObject **mps, *ap; @@ -1042,6 +1043,8 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, } elsize = PyArray_DESCR(obj)->elsize; ret_data = PyArray_DATA(obj); + copyswap = dtype->f->copyswap; + swap = !PyArray_ISNBO(dtype->byteorder); while (PyArray_MultiIter_NOTDONE(multi)) { mi = *((npy_intp *)PyArray_MultiIter_DATA(multi, n)); @@ -1074,12 +1077,11 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, break; } } - memmove(ret_data, PyArray_MultiIter_DATA(multi, mi), elsize); + copyswap(ret_data, PyArray_MultiIter_DATA(multi, mi), swap, NULL); ret_data += elsize; PyArray_MultiIter_NEXT(multi); } - PyArray_INCREF(obj); Py_DECREF(multi); for (i = 0; i < n; i++) { Py_XDECREF(mps[i]); diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 514d271f0f6b..f4c472dd7282 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -10031,3 +10031,13 @@ def test_argsort_int(N, dtype): arr = rnd.randint(low=minv, high=maxv, size=N, dtype=dtype) arr[N-1] = maxv assert_arg_sorted(arr, np.argsort(arr, kind='quick')) + + +@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") +def test_gh_22683(): + a = np.ones(10000, dtype=object) + refc_start = sys.getrefcount(1) + np.choose(np.zeros(10000, dtype=int), [a], out=a) + np.choose(np.zeros(10000, dtype=int), [a], out=a) + refc_end = sys.getrefcount(1) + assert refc_end - refc_start < 10 From 6e41cd6ccbfdd6a8518ab98738feedaf799bf9cd Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Tue, 18 Jul 2023 14:47:00 -0600 Subject: [PATCH 2/6] BUG: PR 24188 revisions * remove copyswap, carefully controlling the reference counting to pass the testsuite --- numpy/core/src/multiarray/item_selection.c | 30 ++++++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index aec68418fa4f..79a647421f1d 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -962,8 +962,7 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, { PyArrayObject *obj = NULL; PyArray_Descr *dtype; - PyArray_CopySwapFunc *copyswap; - int n, elsize, swap; + int n, elsize; npy_intp i; char *ret_data; PyArrayObject **mps, *ap; @@ -1043,8 +1042,6 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, } elsize = PyArray_DESCR(obj)->elsize; ret_data = PyArray_DATA(obj); - copyswap = dtype->f->copyswap; - swap = !PyArray_ISNBO(dtype->byteorder); while (PyArray_MultiIter_NOTDONE(multi)) { mi = *((npy_intp *)PyArray_MultiIter_DATA(multi, n)); @@ -1077,11 +1074,34 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, break; } } - copyswap(ret_data, PyArray_MultiIter_DATA(multi, mi), swap, NULL); + if (out != NULL) { + char *args[2] = {PyArray_MultiIter_DATA(multi, mi), ret_data}; + npy_intp transfer_strides[2] = {elsize, elsize}; + npy_intp one = 1; + NPY_ARRAYMETHOD_FLAGS transfer_flags = 0; + NPY_cast_info cast_info = {.func = NULL}; + PyArrayIterObject *ind_it = (PyArrayIterObject *)PyArray_IterNew((PyObject *)out); + int is_aligned = IsUintAligned(ind_it->ao); + PyArray_GetDTypeTransferFunction( + is_aligned, + PyArray_DESCR(mps[0])->elsize, + PyArray_DESCR(obj)->elsize, + PyArray_DESCR(mps[0]), + PyArray_DESCR(obj), 0, &cast_info, + &transfer_flags); + cast_info.func(&cast_info.context, args, &one, + transfer_strides, cast_info.auxdata); + } + else { + memmove(ret_data, PyArray_MultiIter_DATA(multi, mi), elsize); + } ret_data += elsize; PyArray_MultiIter_NEXT(multi); } + if (out == NULL) { + PyArray_INCREF(obj); + } Py_DECREF(multi); for (i = 0; i < n; i++) { Py_XDECREF(mps[i]); From fbf8dbca92e4e0afbfec39d45f7f077281ad0220 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Tue, 25 Jul 2023 16:44:20 -0600 Subject: [PATCH 3/6] MAINT: PR 24188 revisions * hoist the special `out` handling code out of the inner loop (to the degree the testsuite allowed me to) * add a missing `NPY_cast_info_xfree` * adjust the regression test such that it fails before/passes after on both Python 3.11 and 3.12 beta 4, to deal with PEP 683 --- numpy/core/src/multiarray/item_selection.c | 31 +++++++++++++--------- numpy/core/tests/test_multiarray.py | 7 ++--- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 79a647421f1d..7b2f38a1a19c 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -1042,6 +1042,21 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, } elsize = PyArray_DESCR(obj)->elsize; ret_data = PyArray_DATA(obj); + npy_intp transfer_strides[2] = {elsize, elsize}; + npy_intp one = 1; + NPY_ARRAYMETHOD_FLAGS transfer_flags = 0; + NPY_cast_info cast_info = {.func = NULL}; + if (out != NULL) { + PyArrayIterObject *ind_it = (PyArrayIterObject *)PyArray_IterNew((PyObject *)out); + int is_aligned = IsUintAligned(ind_it->ao); + PyArray_GetDTypeTransferFunction( + is_aligned, + PyArray_DESCR(mps[0])->elsize, + PyArray_DESCR(obj)->elsize, + PyArray_DESCR(mps[0]), + PyArray_DESCR(obj), 0, &cast_info, + &transfer_flags); + } while (PyArray_MultiIter_NOTDONE(multi)) { mi = *((npy_intp *)PyArray_MultiIter_DATA(multi, n)); @@ -1076,19 +1091,6 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, } if (out != NULL) { char *args[2] = {PyArray_MultiIter_DATA(multi, mi), ret_data}; - npy_intp transfer_strides[2] = {elsize, elsize}; - npy_intp one = 1; - NPY_ARRAYMETHOD_FLAGS transfer_flags = 0; - NPY_cast_info cast_info = {.func = NULL}; - PyArrayIterObject *ind_it = (PyArrayIterObject *)PyArray_IterNew((PyObject *)out); - int is_aligned = IsUintAligned(ind_it->ao); - PyArray_GetDTypeTransferFunction( - is_aligned, - PyArray_DESCR(mps[0])->elsize, - PyArray_DESCR(obj)->elsize, - PyArray_DESCR(mps[0]), - PyArray_DESCR(obj), 0, &cast_info, - &transfer_flags); cast_info.func(&cast_info.context, args, &one, transfer_strides, cast_info.auxdata); } @@ -1102,6 +1104,9 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, if (out == NULL) { PyArray_INCREF(obj); } + else { + NPY_cast_info_xfree(&cast_info); + } Py_DECREF(multi); for (i = 0; i < n; i++) { Py_XDECREF(mps[i]); diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index f4c472dd7282..869ebe4d8ac7 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -10035,9 +10035,10 @@ def test_argsort_int(N, dtype): @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") def test_gh_22683(): - a = np.ones(10000, dtype=object) - refc_start = sys.getrefcount(1) + b = 777.68760986 + a = np.array([b] * 10000, dtype=object) + refc_start = sys.getrefcount(b) np.choose(np.zeros(10000, dtype=int), [a], out=a) np.choose(np.zeros(10000, dtype=int), [a], out=a) - refc_end = sys.getrefcount(1) + refc_end = sys.getrefcount(b) assert refc_end - refc_start < 10 From e4b880e494ee0a0125ed01ed61b5b2e24802fa48 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 31 Jul 2023 11:58:43 +0200 Subject: [PATCH 4/6] MAINT: Use explicit copy path in choose based on refcheck --- numpy/core/src/multiarray/item_selection.c | 26 ++++++++++------------ 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 7b2f38a1a19c..29175c4844fa 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -1046,15 +1046,15 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, npy_intp one = 1; NPY_ARRAYMETHOD_FLAGS transfer_flags = 0; NPY_cast_info cast_info = {.func = NULL}; - if (out != NULL) { + if (PyDataType_REFCHK(dtype)) { PyArrayIterObject *ind_it = (PyArrayIterObject *)PyArray_IterNew((PyObject *)out); int is_aligned = IsUintAligned(ind_it->ao); PyArray_GetDTypeTransferFunction( is_aligned, - PyArray_DESCR(mps[0])->elsize, - PyArray_DESCR(obj)->elsize, - PyArray_DESCR(mps[0]), - PyArray_DESCR(obj), 0, &cast_info, + dtype->elsize, + dtype->elsize, + dtype, + dtype, 0, &cast_info, &transfer_flags); } @@ -1089,10 +1089,12 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, break; } } - if (out != NULL) { + if (cast_info.func != NULL) { char *args[2] = {PyArray_MultiIter_DATA(multi, mi), ret_data}; - cast_info.func(&cast_info.context, args, &one, - transfer_strides, cast_info.auxdata); + if (cast_info.func(&cast_info.context, args, &one, + transfer_strides, cast_info.auxdata) < 0) { + goto fail; + } } else { memmove(ret_data, PyArray_MultiIter_DATA(multi, mi), elsize); @@ -1101,12 +1103,7 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, PyArray_MultiIter_NEXT(multi); } - if (out == NULL) { - PyArray_INCREF(obj); - } - else { - NPY_cast_info_xfree(&cast_info); - } + NPY_cast_info_xfree(&cast_info); Py_DECREF(multi); for (i = 0; i < n; i++) { Py_XDECREF(mps[i]); @@ -1122,6 +1119,7 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, return (PyObject *)obj; fail: + NPY_cast_info_xfree(&cast_info); Py_XDECREF(multi); for (i = 0; i < n; i++) { Py_XDECREF(mps[i]); From 1c22bf76e6926d2ce84fba4a3241f6de5d64b76e Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 31 Jul 2023 12:16:43 +0200 Subject: [PATCH 5/6] BUG: Remove unnecessary (and now also segfaulting) iterator creation Also hoist the `dtype` definition up and use it. --- numpy/core/src/multiarray/item_selection.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 29175c4844fa..e3cf1e471109 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -993,9 +993,10 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, if (multi == NULL) { goto fail; } + dtype = PyArray_DESCR(mps[0]); + /* Set-up return array */ if (out == NULL) { - dtype = PyArray_DESCR(mps[0]); Py_INCREF(dtype); obj = (PyArrayObject *)PyArray_NewFromDescr(Py_TYPE(ap), dtype, @@ -1032,7 +1033,6 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, */ flags |= NPY_ARRAY_ENSURECOPY; } - dtype = PyArray_DESCR(mps[0]); Py_INCREF(dtype); obj = (PyArrayObject *)PyArray_FromArray(out, dtype, flags); } @@ -1040,15 +1040,14 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, if (obj == NULL) { goto fail; } - elsize = PyArray_DESCR(obj)->elsize; + elsize = dtype->elsize; ret_data = PyArray_DATA(obj); npy_intp transfer_strides[2] = {elsize, elsize}; npy_intp one = 1; NPY_ARRAYMETHOD_FLAGS transfer_flags = 0; NPY_cast_info cast_info = {.func = NULL}; if (PyDataType_REFCHK(dtype)) { - PyArrayIterObject *ind_it = (PyArrayIterObject *)PyArray_IterNew((PyObject *)out); - int is_aligned = IsUintAligned(ind_it->ao); + int is_aligned = IsUintAligned(obj); PyArray_GetDTypeTransferFunction( is_aligned, dtype->elsize, From 87a93ef9a19bc3e8fd7d9c9150799895d18c99a3 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 31 Jul 2023 12:17:53 +0200 Subject: [PATCH 6/6] ENH: Use `memcpy` and swap order Not sure this makes a difference, but we check for memory overlap so `memmov` isn't necessary and if the compiler keeps the order intact, we want the `memcpy` path to be the hot one. --- numpy/core/src/multiarray/item_selection.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index e3cf1e471109..e935a27edb6c 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -1088,16 +1088,17 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, break; } } - if (cast_info.func != NULL) { + if (cast_info.func == NULL) { + /* We ensure memory doesn't overlap, so can use memcpy */ + memcpy(ret_data, PyArray_MultiIter_DATA(multi, mi), elsize); + } + else { char *args[2] = {PyArray_MultiIter_DATA(multi, mi), ret_data}; if (cast_info.func(&cast_info.context, args, &one, transfer_strides, cast_info.auxdata) < 0) { goto fail; } } - else { - memmove(ret_data, PyArray_MultiIter_DATA(multi, mi), elsize); - } ret_data += elsize; PyArray_MultiIter_NEXT(multi); }