diff --git a/sklearn/tree/_criterion.pxd b/sklearn/tree/_criterion.pxd index 1cbd395af8e37..e4a09cd6b3394 100644 --- a/sklearn/tree/_criterion.pxd +++ b/sklearn/tree/_criterion.pxd @@ -1,3 +1,4 @@ +# cython: language_level=3 # Authors: Gilles Louppe # Peter Prettenhofer # Brian Holt @@ -12,11 +13,11 @@ import numpy as np cimport numpy as np -ctypedef np.npy_float32 DTYPE_t # Type of X -ctypedef np.npy_float64 DOUBLE_t # Type of y, sample_weight -ctypedef np.npy_intp SIZE_t # Type for indices and counters -ctypedef np.npy_int32 INT32_t # Signed 32 bit integer -ctypedef np.npy_uint32 UINT32_t # Unsigned 32 bit integer +from ._tree cimport DTYPE_t # Type of X +from ._tree cimport DOUBLE_t # Type of y, sample_weight +from ._tree cimport SIZE_t # Type for indices and counters +from ._tree cimport INT32_t # Signed 32 bit integer +from ._tree cimport UINT32_t # Unsigned 32 bit integer cdef class Criterion: # The criterion computes the impurity of a node and the reduction of @@ -24,8 +25,7 @@ cdef class Criterion: # such as the mean in regression and class probabilities in classification. # Internal structures - cdef DOUBLE_t* y # Values of y - cdef SIZE_t y_stride # Stride in y (since n_outputs >= 1) + cdef const DOUBLE_t[:, ::1] y # Values of y cdef DOUBLE_t* sample_weight # Sample weights cdef SIZE_t* samples # Sample indices in X, y @@ -53,7 +53,7 @@ cdef class Criterion: # statistics correspond to samples[start:pos] and samples[pos:end]. # Methods - cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, + cdef int init(self, const DOUBLE_t[:, ::1] y, DOUBLE_t* sample_weight, double weighted_n_samples, SIZE_t* samples, SIZE_t start, SIZE_t end) nogil except -1 cdef int reset(self) nogil except -1 diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx index a2b362334de54..e6c3d628baf53 100644 --- a/sklearn/tree/_criterion.pyx +++ b/sklearn/tree/_criterion.pyx @@ -51,7 +51,7 @@ cdef class Criterion: def __setstate__(self, d): pass - cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, + cdef int init(self, const DOUBLE_t[:, ::1] y, DOUBLE_t* sample_weight, double weighted_n_samples, SIZE_t* samples, SIZE_t start, SIZE_t end) nogil except -1: """Placeholder for a method which will initialize the criterion. @@ -63,9 +63,6 @@ cdef class Criterion: ---------- y : array-like, dtype=DOUBLE_t y is a buffer that can store values for n_outputs target variables - y_stride : SIZE_t - y_stride is used to index the kth output value as follows: - y[i, k] = y[i * y_stride + k] sample_weight : array-like, dtype=DOUBLE_t The weight of each sample weighted_n_samples : DOUBLE_t @@ -224,8 +221,6 @@ cdef class ClassificationCriterion(Criterion): The number of unique classes in each target """ - self.y = NULL - self.y_stride = 0 self.sample_weight = NULL self.samples = NULL @@ -281,7 +276,7 @@ cdef class ClassificationCriterion(Criterion): sizet_ptr_to_ndarray(self.n_classes, self.n_outputs)), self.__getstate__()) - cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, + cdef int init(self, const DOUBLE_t[:, ::1] y, DOUBLE_t* sample_weight, double weighted_n_samples, SIZE_t* samples, SIZE_t start, SIZE_t end) nogil except -1: """Initialize the criterion at node samples[start:end] and @@ -294,9 +289,6 @@ cdef class ClassificationCriterion(Criterion): ---------- y : array-like, dtype=DOUBLE_t The target stored as a buffer for memory efficiency - y_stride : SIZE_t - The stride between elements in the buffer, important if there - are multiple targets (multi-output) sample_weight : array-like, dtype=DTYPE_t The weight of each sample weighted_n_samples : SIZE_t @@ -310,7 +302,6 @@ cdef class ClassificationCriterion(Criterion): """ self.y = y - self.y_stride = y_stride self.sample_weight = sample_weight self.samples = samples self.start = start @@ -343,7 +334,7 @@ cdef class ClassificationCriterion(Criterion): # Count weighted class frequency for each target for k in range(self.n_outputs): - c = y[i * y_stride + k] + c = self.y[i, k] sum_total[k * self.sum_stride + c] += w self.weighted_n_node_samples += w @@ -418,7 +409,6 @@ cdef class ClassificationCriterion(Criterion): The new ending position for which to move samples from the right child to the left child. """ - cdef DOUBLE_t* y = self.y cdef SIZE_t pos = self.pos cdef SIZE_t end = self.end @@ -453,8 +443,7 @@ cdef class ClassificationCriterion(Criterion): w = sample_weight[i] for k in range(self.n_outputs): - label_index = (k * self.sum_stride + - y[i * self.y_stride + k]) + label_index = k * self.sum_stride + self.y[i, k] sum_left[label_index] += w self.weighted_n_left += w @@ -469,8 +458,7 @@ cdef class ClassificationCriterion(Criterion): w = sample_weight[i] for k in range(self.n_outputs): - label_index = (k * self.sum_stride + - y[i * self.y_stride + k]) + label_index = k * self.sum_stride + self.y[i, k] sum_left[label_index] -= w self.weighted_n_left -= w @@ -714,8 +702,6 @@ cdef class RegressionCriterion(Criterion): """ # Default values - self.y = NULL - self.y_stride = 0 self.sample_weight = NULL self.samples = NULL @@ -751,14 +737,13 @@ cdef class RegressionCriterion(Criterion): def __reduce__(self): return (type(self), (self.n_outputs, self.n_samples), self.__getstate__()) - cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, + cdef int init(self, const DOUBLE_t[:, ::1] y, DOUBLE_t* sample_weight, double weighted_n_samples, SIZE_t* samples, SIZE_t start, SIZE_t end) nogil except -1: """Initialize the criterion at node samples[start:end] and children samples[start:start] and samples[start:end].""" # Initialize fields self.y = y - self.y_stride = y_stride self.sample_weight = sample_weight self.samples = samples self.start = start @@ -784,7 +769,7 @@ cdef class RegressionCriterion(Criterion): w = sample_weight[i] for k in range(self.n_outputs): - y_ik = y[i * y_stride + k] + y_ik = self.y[i, k] w_y_ik = w * y_ik self.sum_total[k] += w_y_ik self.sq_sum_total += w_y_ik * y_ik @@ -827,14 +812,12 @@ cdef class RegressionCriterion(Criterion): cdef double* sample_weight = self.sample_weight cdef SIZE_t* samples = self.samples - cdef DOUBLE_t* y = self.y cdef SIZE_t pos = self.pos cdef SIZE_t end = self.end cdef SIZE_t i cdef SIZE_t p cdef SIZE_t k cdef DOUBLE_t w = 1.0 - cdef DOUBLE_t y_ik # Update statistics up to new_pos # @@ -852,8 +835,7 @@ cdef class RegressionCriterion(Criterion): w = sample_weight[i] for k in range(self.n_outputs): - y_ik = y[i * self.y_stride + k] - sum_left[k] += w * y_ik + sum_left[k] += w * self.y[i, k] self.weighted_n_left += w else: @@ -866,8 +848,7 @@ cdef class RegressionCriterion(Criterion): w = sample_weight[i] for k in range(self.n_outputs): - y_ik = y[i * self.y_stride + k] - sum_left[k] -= w * y_ik + sum_left[k] -= w * self.y[i, k] self.weighted_n_left -= w @@ -947,8 +928,6 @@ cdef class MSE(RegressionCriterion): left child (samples[start:pos]) and the impurity the right child (samples[pos:end]).""" - - cdef DOUBLE_t* y = self.y cdef DOUBLE_t* sample_weight = self.sample_weight cdef SIZE_t* samples = self.samples cdef SIZE_t pos = self.pos @@ -956,6 +935,7 @@ cdef class MSE(RegressionCriterion): cdef double* sum_left = self.sum_left cdef double* sum_right = self.sum_right + cdef DOUBLE_t y_ik cdef double sq_sum_left = 0.0 cdef double sq_sum_right @@ -964,7 +944,6 @@ cdef class MSE(RegressionCriterion): cdef SIZE_t p cdef SIZE_t k cdef DOUBLE_t w = 1.0 - cdef DOUBLE_t y_ik for p in range(start, pos): i = samples[p] @@ -973,7 +952,7 @@ cdef class MSE(RegressionCriterion): w = sample_weight[i] for k in range(self.n_outputs): - y_ik = y[i * self.y_stride + k] + y_ik = self.y[i, k] sq_sum_left += w * y_ik * y_ik sq_sum_right = self.sq_sum_total - sq_sum_left @@ -1014,8 +993,6 @@ cdef class MAE(RegressionCriterion): """ # Default values - self.y = NULL - self.y_stride = 0 self.sample_weight = NULL self.samples = NULL @@ -1044,19 +1021,17 @@ cdef class MAE(RegressionCriterion): self.left_child[k] = WeightedMedianCalculator(n_samples) self.right_child[k] = WeightedMedianCalculator(n_samples) - cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, + cdef int init(self, const DOUBLE_t[:, ::1] y, DOUBLE_t* sample_weight, double weighted_n_samples, SIZE_t* samples, SIZE_t start, SIZE_t end) nogil except -1: """Initialize the criterion at node samples[start:end] and children samples[start:start] and samples[start:end].""" cdef SIZE_t i, p, k - cdef DOUBLE_t y_ik cdef DOUBLE_t w = 1.0 # Initialize fields self.y = y - self.y_stride = y_stride self.sample_weight = sample_weight self.samples = samples self.start = start @@ -1082,12 +1057,10 @@ cdef class MAE(RegressionCriterion): w = sample_weight[i] for k in range(self.n_outputs): - y_ik = y[i * y_stride + k] - # push method ends up calling safe_realloc, hence `except -1` # push all values to the right side, # since pos = start initially anyway - ( right_child[k]).push(y_ik, w) + ( right_child[k]).push(self.y[i, k], w) self.weighted_n_node_samples += w # calculate the node medians @@ -1172,12 +1145,10 @@ cdef class MAE(RegressionCriterion): cdef void** left_child = self.left_child.data cdef void** right_child = self.right_child.data - cdef DOUBLE_t* y = self.y cdef SIZE_t pos = self.pos cdef SIZE_t end = self.end cdef SIZE_t i, p, k cdef DOUBLE_t w = 1.0 - cdef DOUBLE_t y_ik # Update statistics up to new_pos # @@ -1193,11 +1164,10 @@ cdef class MAE(RegressionCriterion): w = sample_weight[i] for k in range(self.n_outputs): - y_ik = y[i * self.y_stride + k] # remove y_ik and its weight w from right and add to left - ( right_child[k]).remove(y_ik, w) + ( right_child[k]).remove(self.y[i, k], w) # push method ends up calling safe_realloc, hence except -1 - ( left_child[k]).push(y_ik, w) + ( left_child[k]).push(self.y[i, k], w) self.weighted_n_left += w else: @@ -1210,10 +1180,9 @@ cdef class MAE(RegressionCriterion): w = sample_weight[i] for k in range(self.n_outputs): - y_ik = y[i * self.y_stride + k] # remove y_ik and its weight w from left and add to right - ( left_child[k]).remove(y_ik, w) - ( right_child[k]).push(y_ik, w) + ( left_child[k]).remove(self.y[i, k], w) + ( right_child[k]).push(self.y[i, k], w) self.weighted_n_left -= w @@ -1233,11 +1202,9 @@ cdef class MAE(RegressionCriterion): """Evaluate the impurity of the current node, i.e. the impurity of samples[start:end]""" - cdef DOUBLE_t* y = self.y cdef DOUBLE_t* sample_weight = self.sample_weight cdef SIZE_t* samples = self.samples cdef SIZE_t i, p, k - cdef DOUBLE_t y_ik cdef DOUBLE_t w = 1.0 cdef DOUBLE_t impurity = 0.0 @@ -1245,12 +1212,10 @@ cdef class MAE(RegressionCriterion): for p in range(self.start, self.end): i = samples[p] - y_ik = y[i * self.y_stride + k] - if sample_weight != NULL: w = sample_weight[i] - impurity += fabs(y_ik - self.node_medians[k]) * w + impurity += fabs(self.y[i, k] - self.node_medians[k]) * w return impurity / (self.weighted_n_node_samples * self.n_outputs) @@ -1261,7 +1226,6 @@ cdef class MAE(RegressionCriterion): (samples[pos:end]). """ - cdef DOUBLE_t* y = self.y cdef DOUBLE_t* sample_weight = self.sample_weight cdef SIZE_t* samples = self.samples @@ -1270,7 +1234,6 @@ cdef class MAE(RegressionCriterion): cdef SIZE_t end = self.end cdef SIZE_t i, p, k - cdef DOUBLE_t y_ik cdef DOUBLE_t median cdef DOUBLE_t w = 1.0 cdef DOUBLE_t impurity_left = 0.0 @@ -1284,12 +1247,10 @@ cdef class MAE(RegressionCriterion): for p in range(start, pos): i = samples[p] - y_ik = y[i * self.y_stride + k] - if sample_weight != NULL: w = sample_weight[i] - impurity_left += fabs(y_ik - median) * w + impurity_left += fabs(self.y[i, k] - median) * w p_impurity_left[0] = impurity_left / (self.weighted_n_left * self.n_outputs) @@ -1298,12 +1259,10 @@ cdef class MAE(RegressionCriterion): for p in range(pos, end): i = samples[p] - y_ik = y[i * self.y_stride + k] - if sample_weight != NULL: w = sample_weight[i] - impurity_right += fabs(y_ik - median) * w + impurity_right += fabs(self.y[i, k] - median) * w p_impurity_right[0] = impurity_right / (self.weighted_n_right * self.n_outputs) diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd index 4d5c5ae46bceb..5ca7303659a68 100644 --- a/sklearn/tree/_splitter.pxd +++ b/sklearn/tree/_splitter.pxd @@ -1,3 +1,5 @@ +# cython: language_level=3 + # Authors: Gilles Louppe # Peter Prettenhofer # Brian Holt @@ -14,11 +16,11 @@ cimport numpy as np from ._criterion cimport Criterion -ctypedef np.npy_float32 DTYPE_t # Type of X -ctypedef np.npy_float64 DOUBLE_t # Type of y, sample_weight -ctypedef np.npy_intp SIZE_t # Type for indices and counters -ctypedef np.npy_int32 INT32_t # Signed 32 bit integer -ctypedef np.npy_uint32 UINT32_t # Unsigned 32 bit integer +from ._tree cimport DTYPE_t # Type of X +from ._tree cimport DOUBLE_t # Type of y, sample_weight +from ._tree cimport SIZE_t # Type for indices and counters +from ._tree cimport INT32_t # Signed 32 bit integer +from ._tree cimport UINT32_t # Unsigned 32 bit integer cdef struct SplitRecord: # Data to track sample split @@ -60,8 +62,7 @@ cdef class Splitter: cdef bint presort # Whether to use presorting, only # allowed on dense data - cdef DOUBLE_t* y - cdef SIZE_t y_stride + cdef const DOUBLE_t[:, ::1] y cdef DOUBLE_t* sample_weight # The samples vector `samples` is maintained by the Splitter object such @@ -81,7 +82,7 @@ cdef class Splitter: # This allows optimization with depth-based tree building. # Methods - cdef int init(self, object X, np.ndarray y, + cdef int init(self, object X, const DOUBLE_t[:, ::1] y, DOUBLE_t* sample_weight, np.ndarray X_idx_sorted=*) except -1 diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx index 3f5a176d9171a..fab9aabb03e28 100644 --- a/sklearn/tree/_splitter.pyx +++ b/sklearn/tree/_splitter.pyx @@ -92,8 +92,6 @@ cdef class Splitter: self.n_features = 0 self.feature_values = NULL - self.y = NULL - self.y_stride = 0 self.sample_weight = NULL self.max_features = max_features @@ -118,7 +116,7 @@ cdef class Splitter: cdef int init(self, object X, - np.ndarray[DOUBLE_t, ndim=2, mode="c"] y, + DOUBLE_t[:, ::1] y, DOUBLE_t* sample_weight, np.ndarray X_idx_sorted=None) except -1: """Initialize the splitter. @@ -179,8 +177,7 @@ cdef class Splitter: safe_realloc(&self.feature_values, n_samples) safe_realloc(&self.constant_features, n_features) - self.y = y.data - self.y_stride = y.strides[0] / y.itemsize + self.y = y self.sample_weight = sample_weight return 0 @@ -206,7 +203,6 @@ cdef class Splitter: self.end = end self.criterion.init(self.y, - self.y_stride, self.sample_weight, self.weighted_n_samples, self.samples, @@ -240,9 +236,7 @@ cdef class Splitter: cdef class BaseDenseSplitter(Splitter): - cdef DTYPE_t* X - cdef SIZE_t X_sample_stride - cdef SIZE_t X_feature_stride + cdef DTYPE_t[:, :] X cdef np.ndarray X_idx_sorted cdef INT32_t* X_idx_sorted_ptr @@ -254,9 +248,6 @@ cdef class BaseDenseSplitter(Splitter): SIZE_t min_samples_leaf, double min_weight_leaf, object random_state, bint presort): - self.X = NULL - self.X_sample_stride = 0 - self.X_feature_stride = 0 self.X_idx_sorted_ptr = NULL self.X_idx_sorted_stride = 0 self.sample_mask = NULL @@ -269,7 +260,7 @@ cdef class BaseDenseSplitter(Splitter): cdef int init(self, object X, - np.ndarray[DOUBLE_t, ndim=2, mode="c"] y, + DOUBLE_t[:, ::1] y, DOUBLE_t* sample_weight, np.ndarray X_idx_sorted=None) except -1: """Initialize the splitter @@ -281,12 +272,7 @@ cdef class BaseDenseSplitter(Splitter): # Call parent init Splitter.init(self, X, y, sample_weight) - # Initialize X - cdef np.ndarray X_ndarray = X - - self.X = X_ndarray.data - self.X_sample_stride = X.strides[0] / X.itemsize - self.X_feature_stride = X.strides[1] / X.itemsize + self.X = X if self.presort == 1: self.X_idx_sorted = X_idx_sorted @@ -327,10 +313,7 @@ cdef class BestSplitter(BaseDenseSplitter): cdef SIZE_t* constant_features = self.constant_features cdef SIZE_t n_features = self.n_features - cdef DTYPE_t* X = self.X cdef DTYPE_t* Xf = self.feature_values - cdef SIZE_t X_sample_stride = self.X_sample_stride - cdef SIZE_t X_feature_stride = self.X_feature_stride cdef SIZE_t max_features = self.max_features cdef SIZE_t min_samples_leaf = self.min_samples_leaf cdef double min_weight_leaf = self.min_weight_leaf @@ -414,7 +397,6 @@ cdef class BestSplitter(BaseDenseSplitter): f_j += n_found_constants # f_j in the interval [n_total_constants, f_i[ current.feature = features[f_j] - feature_offset = self.X_feature_stride * current.feature # Sort samples along that feature; either by utilizing # presorting, or by copying the values into an array and @@ -428,11 +410,11 @@ cdef class BestSplitter(BaseDenseSplitter): j = X_idx_sorted[i + feature_idx_offset] if sample_mask[j] == 1: samples[p] = j - Xf[p] = X[self.X_sample_stride * j + feature_offset] + Xf[p] = self.X[j, current.feature] p += 1 else: for i in range(start, end): - Xf[i] = X[self.X_sample_stride * samples[i] + feature_offset] + Xf[i] = self.X[samples[i], current.feature] sort(Xf + start, samples + start, end - start) @@ -493,12 +475,11 @@ cdef class BestSplitter(BaseDenseSplitter): # Reorganize into samples[start:best.pos] + samples[best.pos:end] if best.pos < end: - feature_offset = X_feature_stride * best.feature partition_end = end p = start while p < partition_end: - if X[X_sample_stride * samples[p] + feature_offset] <= best.threshold: + if self.X[samples[p], best.feature] <= best.threshold: p += 1 else: @@ -675,10 +656,7 @@ cdef class RandomSplitter(BaseDenseSplitter): cdef SIZE_t* constant_features = self.constant_features cdef SIZE_t n_features = self.n_features - cdef DTYPE_t* X = self.X cdef DTYPE_t* Xf = self.feature_values - cdef SIZE_t X_sample_stride = self.X_sample_stride - cdef SIZE_t X_feature_stride = self.X_feature_stride cdef SIZE_t max_features = self.max_features cdef SIZE_t min_samples_leaf = self.min_samples_leaf cdef double min_weight_leaf = self.min_weight_leaf @@ -753,15 +731,14 @@ cdef class RandomSplitter(BaseDenseSplitter): # f_j in the interval [n_total_constants, f_i[ current.feature = features[f_j] - feature_stride = X_feature_stride * current.feature # Find min, max - min_feature_value = X[X_sample_stride * samples[start] + feature_stride] + min_feature_value = self.X[samples[start], current.feature] max_feature_value = min_feature_value Xf[start] = min_feature_value for p in range(start + 1, end): - current_feature_value = X[X_sample_stride * samples[p] + feature_stride] + current_feature_value = self.X[samples[p], current.feature] Xf[p] = current_feature_value if current_feature_value < min_feature_value: @@ -828,14 +805,13 @@ cdef class RandomSplitter(BaseDenseSplitter): best = current # copy # Reorganize into samples[start:best.pos] + samples[best.pos:end] - feature_stride = X_feature_stride * best.feature if best.pos < end: if current.feature != best.feature: partition_end = end p = start while p < partition_end: - if X[X_sample_stride * samples[p] + feature_stride] <= best.threshold: + if self.X[samples[p], best.feature] <= best.threshold: p += 1 else: @@ -900,7 +876,7 @@ cdef class BaseSparseSplitter(Splitter): cdef int init(self, object X, - np.ndarray[DOUBLE_t, ndim=2, mode="c"] y, + DOUBLE_t[:, ::1] y, DOUBLE_t* sample_weight, np.ndarray X_idx_sorted=None) except -1: """Initialize the splitter diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx index ed259c98ac850..c7aee769ae213 100644 --- a/sklearn/tree/_tree.pyx +++ b/sklearn/tree/_tree.pyx @@ -794,10 +794,7 @@ cdef class Tree: raise ValueError("X.dtype should be np.float32, got %s" % X.dtype) # Extract input - cdef np.ndarray X_ndarray = X - cdef DTYPE_t* X_ptr = X_ndarray.data - cdef SIZE_t X_sample_stride = X.strides[0] / X.itemsize - cdef SIZE_t X_fx_stride = X.strides[1] / X.itemsize + cdef DTYPE_t[:, :] X_ndarray = X cdef SIZE_t n_samples = X.shape[0] # Initialize output @@ -814,8 +811,7 @@ cdef class Tree: # While node not a leaf while node.left_child != _TREE_LEAF: # ... and node.right_child != _TREE_LEAF: - if X_ptr[X_sample_stride * i + - X_fx_stride * node.feature] <= node.threshold: + if X_ndarray[i, node.feature] <= node.threshold: node = &self.nodes[node.left_child] else: node = &self.nodes[node.right_child] @@ -918,10 +914,7 @@ cdef class Tree: raise ValueError("X.dtype should be np.float32, got %s" % X.dtype) # Extract input - cdef np.ndarray X_ndarray = X - cdef DTYPE_t* X_ptr = X_ndarray.data - cdef SIZE_t X_sample_stride = X.strides[0] / X.itemsize - cdef SIZE_t X_fx_stride = X.strides[1] / X.itemsize + cdef DTYPE_t[:, :] X_ndarray = X cdef SIZE_t n_samples = X.shape[0] # Initialize output @@ -948,8 +941,7 @@ cdef class Tree: indices_ptr[indptr_ptr[i + 1]] = (node - self.nodes) indptr_ptr[i + 1] += 1 - if X_ptr[X_sample_stride * i + - X_fx_stride * node.feature] <= node.threshold: + if X_ndarray[i, node.feature] <= node.threshold: node = &self.nodes[node.left_child] else: node = &self.nodes[node.right_child] diff --git a/sklearn/tree/_utils.pxd b/sklearn/tree/_utils.pxd index 04806ade180c2..60a4f552a9527 100644 --- a/sklearn/tree/_utils.pxd +++ b/sklearn/tree/_utils.pxd @@ -1,3 +1,5 @@ +# cython: language_level=3 + # Authors: Gilles Louppe # Peter Prettenhofer # Arnaud Joly @@ -10,7 +12,7 @@ import numpy as np cimport numpy as np -from _tree cimport Node +from ._tree cimport Node from sklearn.neighbors.quad_tree cimport Cell ctypedef np.npy_float32 DTYPE_t # Type of X