From 2e9b3be3d3136b0fcd194e177f55f1bcf28fc4f6 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 15 Feb 2023 10:25:47 -0500 Subject: [PATCH 1/2] MAINT Use newest NumPy C API in tree._criterion --- setup.py | 1 + sklearn/tree/_criterion.pyx | 23 ++++++++++------------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/setup.py b/setup.py index 21734a8eeddba..a6b6e33afd7b0 100755 --- a/setup.py +++ b/setup.py @@ -110,6 +110,7 @@ "sklearn.svm._libsvm", "sklearn.svm._libsvm_sparse", "sklearn.svm._newrand", + "sklearn.tree._criterion", "sklearn.tree._splitter", "sklearn.tree._tree", "sklearn.tree._utils", diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx index e9f32b6e06ef9..3346ff0edc164 100644 --- a/sklearn/tree/_criterion.pyx +++ b/sklearn/tree/_criterion.pyx @@ -950,11 +950,8 @@ cdef class MAE(RegressionCriterion): self.weighted_n_samples = weighted_n_samples self.weighted_n_node_samples = 0. - cdef void** left_child - cdef void** right_child - - left_child = self.left_child.data - right_child = self.right_child.data + cdef void** left_child = cnp.PyArray_DATA(self.left_child) + cdef void** right_child = cnp.PyArray_DATA(self.right_child) for k in range(self.n_outputs): ( left_child[k]).reset() @@ -991,8 +988,8 @@ cdef class MAE(RegressionCriterion): cdef DOUBLE_t value cdef DOUBLE_t weight - cdef void** left_child = self.left_child.data - cdef void** right_child = self.right_child.data + cdef void** left_child = cnp.PyArray_DATA(self.left_child) + cdef void** right_child = cnp.PyArray_DATA(self.right_child) self.weighted_n_left = 0.0 self.weighted_n_right = self.weighted_n_node_samples @@ -1024,8 +1021,8 @@ cdef class MAE(RegressionCriterion): cdef DOUBLE_t value cdef DOUBLE_t weight - cdef void** left_child = self.left_child.data - cdef void** right_child = self.right_child.data + cdef void** left_child = cnp.PyArray_DATA(self.left_child) + cdef void** right_child = cnp.PyArray_DATA(self.right_child) # reverse reset the WeightedMedianCalculators, right should have no # elements and left should have all elements. @@ -1049,8 +1046,8 @@ cdef class MAE(RegressionCriterion): cdef const DOUBLE_t[:] sample_weight = self.sample_weight cdef const SIZE_t[:] sample_indices = self.sample_indices - cdef void** left_child = self.left_child.data - cdef void** right_child = self.right_child.data + cdef void** left_child = cnp.PyArray_DATA(self.left_child) + cdef void** right_child = cnp.PyArray_DATA(self.right_child) cdef SIZE_t pos = self.pos cdef SIZE_t end = self.end @@ -1147,8 +1144,8 @@ cdef class MAE(RegressionCriterion): cdef DOUBLE_t impurity_left = 0.0 cdef DOUBLE_t impurity_right = 0.0 - cdef void** left_child = self.left_child.data - cdef void** right_child = self.right_child.data + cdef void** left_child = cnp.PyArray_DATA(self.left_child) + cdef void** right_child = cnp.PyArray_DATA(self.right_child) for k in range(self.n_outputs): median = ( left_child[k]).get_median() From abdd8e57c3a96dacd5cfbb8182999ecbb8cc6f43 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 15 Feb 2023 17:14:46 -0500 Subject: [PATCH 2/2] FIX Use pointer for children --- sklearn/tree/_criterion.pyx | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx index 3346ff0edc164..67bf466d91254 100644 --- a/sklearn/tree/_criterion.pyx +++ b/sklearn/tree/_criterion.pyx @@ -889,6 +889,8 @@ cdef class MAE(RegressionCriterion): cdef cnp.ndarray left_child cdef cnp.ndarray right_child + cdef void** left_child_ptr + cdef void** right_child_ptr cdef DOUBLE_t[::1] node_medians def __cinit__(self, SIZE_t n_outputs, SIZE_t n_samples): @@ -923,6 +925,9 @@ cdef class MAE(RegressionCriterion): self.left_child[k] = WeightedMedianCalculator(n_samples) self.right_child[k] = WeightedMedianCalculator(n_samples) + self.left_child_ptr = cnp.PyArray_DATA(self.left_child) + self.right_child_ptr = cnp.PyArray_DATA(self.right_child) + cdef int init( self, const DOUBLE_t[:, ::1] y, @@ -950,8 +955,8 @@ cdef class MAE(RegressionCriterion): self.weighted_n_samples = weighted_n_samples self.weighted_n_node_samples = 0. - cdef void** left_child = cnp.PyArray_DATA(self.left_child) - cdef void** right_child = cnp.PyArray_DATA(self.right_child) + cdef void** left_child = self.left_child_ptr + cdef void** right_child = self.right_child_ptr for k in range(self.n_outputs): ( left_child[k]).reset() @@ -988,8 +993,8 @@ cdef class MAE(RegressionCriterion): cdef DOUBLE_t value cdef DOUBLE_t weight - cdef void** left_child = cnp.PyArray_DATA(self.left_child) - cdef void** right_child = cnp.PyArray_DATA(self.right_child) + cdef void** left_child = self.left_child_ptr + cdef void** right_child = self.right_child_ptr self.weighted_n_left = 0.0 self.weighted_n_right = self.weighted_n_node_samples @@ -1021,8 +1026,8 @@ cdef class MAE(RegressionCriterion): cdef DOUBLE_t value cdef DOUBLE_t weight - cdef void** left_child = cnp.PyArray_DATA(self.left_child) - cdef void** right_child = cnp.PyArray_DATA(self.right_child) + cdef void** left_child = self.left_child_ptr + cdef void** right_child = self.right_child_ptr # reverse reset the WeightedMedianCalculators, right should have no # elements and left should have all elements. @@ -1046,8 +1051,8 @@ cdef class MAE(RegressionCriterion): cdef const DOUBLE_t[:] sample_weight = self.sample_weight cdef const SIZE_t[:] sample_indices = self.sample_indices - cdef void** left_child = cnp.PyArray_DATA(self.left_child) - cdef void** right_child = cnp.PyArray_DATA(self.right_child) + cdef void** left_child = self.left_child_ptr + cdef void** right_child = self.right_child_ptr cdef SIZE_t pos = self.pos cdef SIZE_t end = self.end @@ -1144,8 +1149,8 @@ cdef class MAE(RegressionCriterion): cdef DOUBLE_t impurity_left = 0.0 cdef DOUBLE_t impurity_right = 0.0 - cdef void** left_child = cnp.PyArray_DATA(self.left_child) - cdef void** right_child = cnp.PyArray_DATA(self.right_child) + cdef void** left_child = self.left_child_ptr + cdef void** right_child = self.right_child_ptr for k in range(self.n_outputs): median = ( left_child[k]).get_median()