From 02672649aa879b0606d889d76476b9d81218b2aa Mon Sep 17 00:00:00 2001 From: mattip Date: Tue, 18 Sep 2018 10:21:51 +0300 Subject: [PATCH 1/5] BUG: use cyclic garbage collection semantics in np.frompyfunc --- numpy/core/src/umath/ufunc_object.c | 44 +++++++++++++++++---- numpy/core/src/umath/umathmodule.c | 1 + numpy/lib/tests/test_function_base.py | 56 ++++++++++++++++++++++++++- 3 files changed, 92 insertions(+), 9 deletions(-) diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index ea0007a9d869..ade2b48ed760 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -4892,12 +4892,15 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi return NULL; } - ufunc = PyArray_malloc(sizeof(PyUFuncObject)); + ufunc = PyObject_GC_New(PyUFuncObject, &PyUFunc_Type); + /* + * We use GC_New here for ufunc->obj, but do not use GC_Track since + * ufunc->obj is still NULL at the end of this function. + * See ufunc_frompyfunc where ufunc->obj is set and GC_Track is called. + */ if (ufunc == NULL) { return NULL; } - memset(ufunc, 0, sizeof(PyUFuncObject)); - PyObject_Init((PyObject *)ufunc, &PyUFunc_Type); ufunc->nin = nin; ufunc->nout = nout; @@ -4912,6 +4915,20 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi ufunc->data = data; ufunc->types = types; ufunc->ntypes = ntypes; + ufunc->core_signature = NULL; + ufunc->core_enabled = 0; + ufunc->obj = NULL; + ufunc->core_num_dims = NULL; + ufunc->core_num_dim_ix = 0; + ufunc->core_offsets = NULL; + ufunc->core_dim_ixs = NULL; + ufunc->core_dim_sizes = NULL; + ufunc->core_dim_flags = NULL; + ufunc->userloops = NULL; + ufunc->ptr = NULL; + ufunc->reserved2 = NULL; + ufunc->reserved1 = 0; + ufunc->iter_flags = 0; /* Type resolution and inner loop selection functions */ ufunc->type_resolver = &PyUFunc_DefaultTypeResolver; @@ -5284,11 +5301,14 @@ ufunc_dealloc(PyUFuncObject *ufunc) PyArray_free(ufunc->ptr); PyArray_free(ufunc->op_flags); Py_XDECREF(ufunc->userloops); - Py_XDECREF(ufunc->obj); if (ufunc->identity == PyUFunc_IdentityValue) { Py_DECREF(ufunc->identity_value); } - PyArray_free(ufunc); + if (ufunc->obj != NULL) { + PyObject_GC_UnTrack((PyObject *)ufunc); + Py_DECREF(ufunc->obj); + } + PyObject_GC_Del(ufunc); } static PyObject * @@ -5297,6 +5317,16 @@ ufunc_repr(PyUFuncObject *ufunc) return PyUString_FromFormat("", ufunc->name); } +static int +ufunc_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyUFuncObject *ufunc = (PyUFuncObject*)self; + Py_VISIT(ufunc->obj); + if (ufunc->identity == PyUFunc_IdentityValue) { + Py_VISIT(ufunc->identity_value); + } + return 0; +} /****************************************************************************** *** UFUNC METHODS *** @@ -6013,9 +6043,9 @@ NPY_NO_EXPORT PyTypeObject PyUFunc_Type = { 0, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 0, /* tp_doc */ - 0, /* tp_traverse */ + (traverseproc)ufunc_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ diff --git a/numpy/core/src/umath/umathmodule.c b/numpy/core/src/umath/umathmodule.c index 8277ad6cc5d3..ae85a7ad52b1 100644 --- a/numpy/core/src/umath/umathmodule.c +++ b/numpy/core/src/umath/umathmodule.c @@ -161,6 +161,7 @@ ufunc_frompyfunc(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *NPY_UNUS self->type_resolver = &object_ufunc_type_resolver; self->legacy_inner_loop_selector = &object_ufunc_loop_selector; + PyObject_GC_Track(self); return (PyObject *)self; } diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 3d4b0e3b2bff..951176040a4c 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -4,6 +4,7 @@ import warnings import sys import decimal +import types import pytest import numpy as np @@ -24,6 +25,7 @@ from numpy.compat import long +PY2 = sys.version_info[0] == 2 def get_mat(n): data = np.arange(n) @@ -353,9 +355,9 @@ class subclass(np.ndarray): assert_equal(type(np.average(a, weights=w)), subclass) def test_upcasting(self): - types = [('i4', 'i4', 'f8'), ('i4', 'f4', 'f8'), ('f4', 'i4', 'f8'), + typs = [('i4', 'i4', 'f8'), ('i4', 'f4', 'f8'), ('f4', 'i4', 'f8'), ('f4', 'f4', 'f4'), ('f4', 'f8', 'f8')] - for at, wt, rt in types: + for at, wt, rt in typs: a = np.array([[1,2],[3,4]], dtype=at) w = np.array([[1,2],[3,4]], dtype=wt) assert_equal(np.average(a, weights=w).dtype, np.dtype(rt)) @@ -1498,6 +1500,56 @@ def test_size_zero_output(self): f(x) +class TestLeaks(object): + class A(object): + iters = 20 + + def bound(self, *args): + return np.int(0) + + @staticmethod + def unbound(*args): + return np.int(0) + + def npy_bound(self, a): + return types.MethodType(np.frompyfunc(self.bound, 1, 1), a) + + def npy_unbound(self, a): + return np.frompyfunc(self.unbound, 1, 1) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + @pytest.mark.parametrize('func, npy_func, incr', [ + # Test with both an unbound method and a bound one + (A.bound, A.npy_bound, A.iters), + (A.unbound, A.npy_unbound, 0), + ]) + def test_frompyfunc_leaks(self, func, npy_func, incr): + # exposed in gh-11867 as np.vectorized, but the problem stems from + # frompyfunc. + # class.attribute = np.frompyfunc() creates a + # reference cycle that requires a gc collection cycle to break + # (on CPython 3) + import gc + + gc.disable() + try: + refcount = sys.getrefcount(func) + for i in range(self.A.iters): + a = self.A() + a.f = npy_func(a, a) + out = a.f(np.arange(10)) + a = None + if PY2: + assert_equal(sys.getrefcount(func), refcount) + else: + # func is part of a reference cycle + assert_equal(sys.getrefcount(func), refcount + incr) + for i in range(5): + gc.collect() + assert_equal(sys.getrefcount(func), refcount) + finally: + gc.enable() + class TestDigitize(object): def test_forward(self): From f73d96d7aef824f6e6da193f9d9e1a525f3639d1 Mon Sep 17 00:00:00 2001 From: mattip Date: Tue, 4 Dec 2018 04:02:46 -0800 Subject: [PATCH 2/5] MAINT: cleanup comments and harmless reference assignment --- numpy/core/include/numpy/ufuncobject.h | 8 ++++++-- numpy/core/src/umath/ufunc_object.c | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h index 90d837a9b53a..15dcdf010e8d 100644 --- a/numpy/core/include/numpy/ufuncobject.h +++ b/numpy/core/include/numpy/ufuncobject.h @@ -120,7 +120,11 @@ typedef struct _tagPyUFuncObject { */ int nin, nout, nargs; - /* Identity for reduction, either PyUFunc_One or PyUFunc_Zero */ + /* + * Identity for reduction, any of PyUFunc_One, PyUFunc_Zero + * PyUFunc_MinusOne, PyUFunc_None, PyUFunc_ReorderableNone, + * PyUFunc_IdentityValue. + */ int identity; /* Array of one-dimensional core loops */ @@ -301,7 +305,7 @@ typedef struct _tagPyUFuncObject { */ #define PyUFunc_ReorderableNone -2 /* - * UFunc unit is in identity_value, and the order of operations can be reordered + * UFunc unit is an identity_value, and the order of operations can be reordered * This case allows reduction with multiple axes at once. */ #define PyUFunc_IdentityValue -3 diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index ade2b48ed760..9307238512ad 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -4908,8 +4908,10 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi ufunc->identity = identity; if (ufunc->identity == PyUFunc_IdentityValue) { Py_INCREF(identity_value); + ufunc->identity_value = identity_value; + } else { + ufunc->identity_value = NULL; } - ufunc->identity_value = identity_value; ufunc->functions = func; ufunc->data = data; From a3b9f757bc473efe5803e1a164db9d832c43c7c9 Mon Sep 17 00:00:00 2001 From: mattip Date: Tue, 18 Dec 2018 13:05:52 +0200 Subject: [PATCH 3/5] MAINT: fixes from review --- numpy/core/src/umath/ufunc_object.c | 11 +++++------ numpy/lib/tests/test_function_base.py | 8 ++++---- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 9307238512ad..a766ade6cc00 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -5296,6 +5296,7 @@ PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc, static void ufunc_dealloc(PyUFuncObject *ufunc) { + PyObject_GC_UnTrack((PyObject *)ufunc); PyArray_free(ufunc->core_num_dims); PyArray_free(ufunc->core_dim_ixs); PyArray_free(ufunc->core_offsets); @@ -5307,7 +5308,6 @@ ufunc_dealloc(PyUFuncObject *ufunc) Py_DECREF(ufunc->identity_value); } if (ufunc->obj != NULL) { - PyObject_GC_UnTrack((PyObject *)ufunc); Py_DECREF(ufunc->obj); } PyObject_GC_Del(ufunc); @@ -5320,12 +5320,11 @@ ufunc_repr(PyUFuncObject *ufunc) } static int -ufunc_traverse(PyObject *self, visitproc visit, void *arg) +ufunc_traverse(PyUFuncObject *self, visitproc visit, void *arg) { - PyUFuncObject *ufunc = (PyUFuncObject*)self; - Py_VISIT(ufunc->obj); - if (ufunc->identity == PyUFunc_IdentityValue) { - Py_VISIT(ufunc->identity_value); + Py_VISIT(self->obj); + if (self->identity == PyUFunc_IdentityValue) { + Py_VISIT(self->identity_value); } return 0; } diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 951176040a4c..f5d0da02510f 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -1505,11 +1505,11 @@ class A(object): iters = 20 def bound(self, *args): - return np.int(0) + return 0 @staticmethod def unbound(*args): - return np.int(0) + return 0 def npy_bound(self, a): return types.MethodType(np.frompyfunc(self.bound, 1, 1), a) @@ -1527,8 +1527,8 @@ def test_frompyfunc_leaks(self, func, npy_func, incr): # exposed in gh-11867 as np.vectorized, but the problem stems from # frompyfunc. # class.attribute = np.frompyfunc() creates a - # reference cycle that requires a gc collection cycle to break - # (on CPython 3) + # reference cycle if is a bound class method that requires a + # gc collection cycle to break (on CPython 3) import gc gc.disable() From 7474a71bc2841d40de84764e14443de444321be6 Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 9 Jan 2019 12:36:49 +0200 Subject: [PATCH 4/5] MAINT: fix from review --- numpy/core/src/umath/ufunc_object.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index a766ade6cc00..752df01fc547 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -4909,7 +4909,8 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi if (ufunc->identity == PyUFunc_IdentityValue) { Py_INCREF(identity_value); ufunc->identity_value = identity_value; - } else { + } + else { ufunc->identity_value = NULL; } From 82419c00ed41899990d8a0d332535d246edc4147 Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 9 Jan 2019 14:57:25 +0200 Subject: [PATCH 5/5] MAINT: simplify test (seberg) --- numpy/lib/tests/test_function_base.py | 33 +++++++++++---------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index f5d0da02510f..6fa5c7ad90b0 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -1511,42 +1511,35 @@ def bound(self, *args): def unbound(*args): return 0 - def npy_bound(self, a): - return types.MethodType(np.frompyfunc(self.bound, 1, 1), a) - - def npy_unbound(self, a): - return np.frompyfunc(self.unbound, 1, 1) - @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") - @pytest.mark.parametrize('func, npy_func, incr', [ - # Test with both an unbound method and a bound one - (A.bound, A.npy_bound, A.iters), - (A.unbound, A.npy_unbound, 0), + @pytest.mark.parametrize('name, incr', [ + ('bound', A.iters), + ('unbound', 0), ]) - def test_frompyfunc_leaks(self, func, npy_func, incr): + def test_frompyfunc_leaks(self, name, incr): # exposed in gh-11867 as np.vectorized, but the problem stems from # frompyfunc. # class.attribute = np.frompyfunc() creates a - # reference cycle if is a bound class method that requires a - # gc collection cycle to break (on CPython 3) + # reference cycle if is a bound class method. It requires a + # gc collection cycle to break the cycle (on CPython 3) import gc - + A_func = getattr(self.A, name) gc.disable() try: - refcount = sys.getrefcount(func) + refcount = sys.getrefcount(A_func) for i in range(self.A.iters): a = self.A() - a.f = npy_func(a, a) + a.f = np.frompyfunc(getattr(a, name), 1, 1) out = a.f(np.arange(10)) a = None if PY2: - assert_equal(sys.getrefcount(func), refcount) + assert_equal(sys.getrefcount(A_func), refcount) else: - # func is part of a reference cycle - assert_equal(sys.getrefcount(func), refcount + incr) + # A.func is part of a reference cycle if incr is non-zero + assert_equal(sys.getrefcount(A_func), refcount + incr) for i in range(5): gc.collect() - assert_equal(sys.getrefcount(func), refcount) + assert_equal(sys.getrefcount(A_func), refcount) finally: gc.enable()