diff --git a/sklearn/tree/_criterion.pxd b/sklearn/tree/_criterion.pxd
index 1cbd395af8e37..e4a09cd6b3394 100644
--- a/sklearn/tree/_criterion.pxd
+++ b/sklearn/tree/_criterion.pxd
@@ -1,3 +1,4 @@
+# cython: language_level=3
 # Authors: Gilles Louppe <g.louppe@gmail.com>
 #          Peter Prettenhofer <peter.prettenhofer@gmail.com>
 #          Brian Holt <bdholt1@gmail.com>
@@ -12,11 +13,11 @@
 import numpy as np
 cimport numpy as np
 
-ctypedef np.npy_float32 DTYPE_t          # Type of X
-ctypedef np.npy_float64 DOUBLE_t         # Type of y, sample_weight
-ctypedef np.npy_intp SIZE_t              # Type for indices and counters
-ctypedef np.npy_int32 INT32_t            # Signed 32 bit integer
-ctypedef np.npy_uint32 UINT32_t          # Unsigned 32 bit integer
+from ._tree cimport DTYPE_t          # Type of X
+from ._tree cimport DOUBLE_t         # Type of y, sample_weight
+from ._tree cimport SIZE_t           # Type for indices and counters
+from ._tree cimport INT32_t          # Signed 32 bit integer
+from ._tree cimport UINT32_t         # Unsigned 32 bit integer
 
 cdef class Criterion:
     # The criterion computes the impurity of a node and the reduction of
@@ -24,8 +25,7 @@ cdef class Criterion:
     # such as the mean in regression and class probabilities in classification.
 
     # Internal structures
-    cdef DOUBLE_t* y                     # Values of y
-    cdef SIZE_t y_stride                 # Stride in y (since n_outputs >= 1)
+    cdef const DOUBLE_t[:, ::1] y        # Values of y
     cdef DOUBLE_t* sample_weight         # Sample weights
 
     cdef SIZE_t* samples                 # Sample indices in X, y
@@ -53,7 +53,7 @@ cdef class Criterion:
     # statistics correspond to samples[start:pos] and samples[pos:end].
 
     # Methods
-    cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
+    cdef int init(self, const DOUBLE_t[:, ::1] y, DOUBLE_t* sample_weight,
                   double weighted_n_samples, SIZE_t* samples, SIZE_t start,
                   SIZE_t end) nogil except -1
     cdef int reset(self) nogil except -1
diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx
index a2b362334de54..e6c3d628baf53 100644
--- a/sklearn/tree/_criterion.pyx
+++ b/sklearn/tree/_criterion.pyx
@@ -51,7 +51,7 @@ cdef class Criterion:
     def __setstate__(self, d):
         pass
 
-    cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
+    cdef int init(self, const DOUBLE_t[:, ::1] y, DOUBLE_t* sample_weight,
                   double weighted_n_samples, SIZE_t* samples, SIZE_t start,
                   SIZE_t end) nogil except -1:
         """Placeholder for a method which will initialize the criterion.
@@ -63,9 +63,6 @@ cdef class Criterion:
         ----------
         y : array-like, dtype=DOUBLE_t
             y is a buffer that can store values for n_outputs target variables
-        y_stride : SIZE_t
-            y_stride is used to index the kth output value as follows:
-            y[i, k] = y[i * y_stride + k]
         sample_weight : array-like, dtype=DOUBLE_t
             The weight of each sample
         weighted_n_samples : DOUBLE_t
@@ -224,8 +221,6 @@ cdef class ClassificationCriterion(Criterion):
             The number of unique classes in each target
         """
 
-        self.y = NULL
-        self.y_stride = 0
         self.sample_weight = NULL
 
         self.samples = NULL
@@ -281,7 +276,7 @@ cdef class ClassificationCriterion(Criterion):
                  sizet_ptr_to_ndarray(self.n_classes, self.n_outputs)),
                 self.__getstate__())
 
-    cdef int init(self, DOUBLE_t* y, SIZE_t y_stride,
+    cdef int init(self, const DOUBLE_t[:, ::1] y,
                   DOUBLE_t* sample_weight, double weighted_n_samples,
                   SIZE_t* samples, SIZE_t start, SIZE_t end) nogil except -1:
         """Initialize the criterion at node samples[start:end] and
@@ -294,9 +289,6 @@ cdef class ClassificationCriterion(Criterion):
         ----------
         y : array-like, dtype=DOUBLE_t
             The target stored as a buffer for memory efficiency
-        y_stride : SIZE_t
-            The stride between elements in the buffer, important if there
-            are multiple targets (multi-output)
         sample_weight : array-like, dtype=DTYPE_t
             The weight of each sample
         weighted_n_samples : SIZE_t
@@ -310,7 +302,6 @@ cdef class ClassificationCriterion(Criterion):
         """
 
         self.y = y
-        self.y_stride = y_stride
         self.sample_weight = sample_weight
         self.samples = samples
         self.start = start
@@ -343,7 +334,7 @@ cdef class ClassificationCriterion(Criterion):
 
             # Count weighted class frequency for each target
             for k in range(self.n_outputs):
-                c = <SIZE_t> y[i * y_stride + k]
+                c = <SIZE_t> self.y[i, k]
                 sum_total[k * self.sum_stride + c] += w
 
             self.weighted_n_node_samples += w
@@ -418,7 +409,6 @@ cdef class ClassificationCriterion(Criterion):
             The new ending position for which to move samples from the right
             child to the left child.
         """
-        cdef DOUBLE_t* y = self.y
         cdef SIZE_t pos = self.pos
         cdef SIZE_t end = self.end
 
@@ -453,8 +443,7 @@ cdef class ClassificationCriterion(Criterion):
                     w = sample_weight[i]
 
                 for k in range(self.n_outputs):
-                    label_index = (k * self.sum_stride +
-                                   <SIZE_t> y[i * self.y_stride + k])
+                    label_index = k * self.sum_stride + <SIZE_t> self.y[i, k]
                     sum_left[label_index] += w
 
                 self.weighted_n_left += w
@@ -469,8 +458,7 @@ cdef class ClassificationCriterion(Criterion):
                     w = sample_weight[i]
 
                 for k in range(self.n_outputs):
-                    label_index = (k * self.sum_stride +
-                                   <SIZE_t> y[i * self.y_stride + k])
+                    label_index = k * self.sum_stride + <SIZE_t> self.y[i, k]
                     sum_left[label_index] -= w
 
                 self.weighted_n_left -= w
@@ -714,8 +702,6 @@ cdef class RegressionCriterion(Criterion):
         """
 
         # Default values
-        self.y = NULL
-        self.y_stride = 0
         self.sample_weight = NULL
 
         self.samples = NULL
@@ -751,14 +737,13 @@ cdef class RegressionCriterion(Criterion):
     def __reduce__(self):
         return (type(self), (self.n_outputs, self.n_samples), self.__getstate__())
 
-    cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
+    cdef int init(self, const DOUBLE_t[:, ::1] y, DOUBLE_t* sample_weight,
                   double weighted_n_samples, SIZE_t* samples, SIZE_t start,
                   SIZE_t end) nogil except -1:
         """Initialize the criterion at node samples[start:end] and
            children samples[start:start] and samples[start:end]."""
         # Initialize fields
         self.y = y
-        self.y_stride = y_stride
         self.sample_weight = sample_weight
         self.samples = samples
         self.start = start
@@ -784,7 +769,7 @@ cdef class RegressionCriterion(Criterion):
                 w = sample_weight[i]
 
             for k in range(self.n_outputs):
-                y_ik = y[i * y_stride + k]
+                y_ik = self.y[i, k]
                 w_y_ik = w * y_ik
                 self.sum_total[k] += w_y_ik
                 self.sq_sum_total += w_y_ik * y_ik
@@ -827,14 +812,12 @@ cdef class RegressionCriterion(Criterion):
         cdef double* sample_weight = self.sample_weight
         cdef SIZE_t* samples = self.samples
 
-        cdef DOUBLE_t* y = self.y
         cdef SIZE_t pos = self.pos
         cdef SIZE_t end = self.end
         cdef SIZE_t i
         cdef SIZE_t p
         cdef SIZE_t k
         cdef DOUBLE_t w = 1.0
-        cdef DOUBLE_t y_ik
 
         # Update statistics up to new_pos
         #
@@ -852,8 +835,7 @@ cdef class RegressionCriterion(Criterion):
                     w = sample_weight[i]
 
                 for k in range(self.n_outputs):
-                    y_ik = y[i * self.y_stride + k]
-                    sum_left[k] += w * y_ik
+                    sum_left[k] += w * self.y[i, k]
 
                 self.weighted_n_left += w
         else:
@@ -866,8 +848,7 @@ cdef class RegressionCriterion(Criterion):
                     w = sample_weight[i]
 
                 for k in range(self.n_outputs):
-                    y_ik = y[i * self.y_stride + k]
-                    sum_left[k] -= w * y_ik
+                    sum_left[k] -= w * self.y[i, k]
 
                 self.weighted_n_left -= w
 
@@ -947,8 +928,6 @@ cdef class MSE(RegressionCriterion):
            left child (samples[start:pos]) and the impurity the right child
            (samples[pos:end])."""
 
-
-        cdef DOUBLE_t* y = self.y
         cdef DOUBLE_t* sample_weight = self.sample_weight
         cdef SIZE_t* samples = self.samples
         cdef SIZE_t pos = self.pos
@@ -956,6 +935,7 @@ cdef class MSE(RegressionCriterion):
 
         cdef double* sum_left = self.sum_left
         cdef double* sum_right = self.sum_right
+        cdef DOUBLE_t y_ik
 
         cdef double sq_sum_left = 0.0
         cdef double sq_sum_right
@@ -964,7 +944,6 @@ cdef class MSE(RegressionCriterion):
         cdef SIZE_t p
         cdef SIZE_t k
         cdef DOUBLE_t w = 1.0
-        cdef DOUBLE_t y_ik
 
         for p in range(start, pos):
             i = samples[p]
@@ -973,7 +952,7 @@ cdef class MSE(RegressionCriterion):
                 w = sample_weight[i]
 
             for k in range(self.n_outputs):
-                y_ik = y[i * self.y_stride + k]
+                y_ik = self.y[i, k]
                 sq_sum_left += w * y_ik * y_ik
 
         sq_sum_right = self.sq_sum_total - sq_sum_left
@@ -1014,8 +993,6 @@ cdef class MAE(RegressionCriterion):
         """
 
         # Default values
-        self.y = NULL
-        self.y_stride = 0
         self.sample_weight = NULL
 
         self.samples = NULL
@@ -1044,19 +1021,17 @@ cdef class MAE(RegressionCriterion):
             self.left_child[k] = WeightedMedianCalculator(n_samples)
             self.right_child[k] = WeightedMedianCalculator(n_samples)
 
-    cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
+    cdef int init(self, const DOUBLE_t[:, ::1] y, DOUBLE_t* sample_weight,
                   double weighted_n_samples, SIZE_t* samples, SIZE_t start,
                   SIZE_t end) nogil except -1:
         """Initialize the criterion at node samples[start:end] and
            children samples[start:start] and samples[start:end]."""
 
         cdef SIZE_t i, p, k
-        cdef DOUBLE_t y_ik
         cdef DOUBLE_t w = 1.0
 
         # Initialize fields
         self.y = y
-        self.y_stride = y_stride
         self.sample_weight = sample_weight
         self.samples = samples
         self.start = start
@@ -1082,12 +1057,10 @@ cdef class MAE(RegressionCriterion):
                 w = sample_weight[i]
 
             for k in range(self.n_outputs):
-                y_ik = y[i * y_stride + k]
-
                 # push method ends up calling safe_realloc, hence `except -1`
                 # push all values to the right side,
                 # since pos = start initially anyway
-                (<WeightedMedianCalculator> right_child[k]).push(y_ik, w)
+                (<WeightedMedianCalculator> right_child[k]).push(self.y[i, k], w)
 
             self.weighted_n_node_samples += w
         # calculate the node medians
@@ -1172,12 +1145,10 @@ cdef class MAE(RegressionCriterion):
         cdef void** left_child = <void**> self.left_child.data
         cdef void** right_child = <void**> self.right_child.data
 
-        cdef DOUBLE_t* y = self.y
         cdef SIZE_t pos = self.pos
         cdef SIZE_t end = self.end
         cdef SIZE_t i, p, k
         cdef DOUBLE_t w = 1.0
-        cdef DOUBLE_t y_ik
 
         # Update statistics up to new_pos
         #
@@ -1193,11 +1164,10 @@ cdef class MAE(RegressionCriterion):
                     w = sample_weight[i]
 
                 for k in range(self.n_outputs):
-                    y_ik = y[i * self.y_stride + k]
                     # remove y_ik and its weight w from right and add to left
-                    (<WeightedMedianCalculator> right_child[k]).remove(y_ik, w)
+                    (<WeightedMedianCalculator> right_child[k]).remove(self.y[i, k], w)
                     # push method ends up calling safe_realloc, hence except -1
-                    (<WeightedMedianCalculator> left_child[k]).push(y_ik, w)
+                    (<WeightedMedianCalculator> left_child[k]).push(self.y[i, k], w)
 
                 self.weighted_n_left += w
         else:
@@ -1210,10 +1180,9 @@ cdef class MAE(RegressionCriterion):
                     w = sample_weight[i]
 
                 for k in range(self.n_outputs):
-                    y_ik = y[i * self.y_stride + k]
                     # remove y_ik and its weight w from left and add to right
-                    (<WeightedMedianCalculator> left_child[k]).remove(y_ik, w)
-                    (<WeightedMedianCalculator> right_child[k]).push(y_ik, w)
+                    (<WeightedMedianCalculator> left_child[k]).remove(self.y[i, k], w)
+                    (<WeightedMedianCalculator> right_child[k]).push(self.y[i, k], w)
 
                 self.weighted_n_left -= w
 
@@ -1233,11 +1202,9 @@ cdef class MAE(RegressionCriterion):
         """Evaluate the impurity of the current node, i.e. the impurity of
            samples[start:end]"""
 
-        cdef DOUBLE_t* y = self.y
         cdef DOUBLE_t* sample_weight = self.sample_weight
         cdef SIZE_t* samples = self.samples
         cdef SIZE_t i, p, k
-        cdef DOUBLE_t y_ik
         cdef DOUBLE_t w = 1.0
         cdef DOUBLE_t impurity = 0.0
 
@@ -1245,12 +1212,10 @@ cdef class MAE(RegressionCriterion):
             for p in range(self.start, self.end):
                 i = samples[p]
 
-                y_ik = y[i * self.y_stride + k]
-
                 if sample_weight != NULL:
                     w = sample_weight[i]
 
-                impurity += fabs(y_ik - self.node_medians[k]) * w
+                impurity += fabs(self.y[i, k] - self.node_medians[k]) * w
 
         return impurity / (self.weighted_n_node_samples * self.n_outputs)
 
@@ -1261,7 +1226,6 @@ cdef class MAE(RegressionCriterion):
            (samples[pos:end]).
         """
 
-        cdef DOUBLE_t* y = self.y
         cdef DOUBLE_t* sample_weight = self.sample_weight
         cdef SIZE_t* samples = self.samples
 
@@ -1270,7 +1234,6 @@ cdef class MAE(RegressionCriterion):
         cdef SIZE_t end = self.end
 
         cdef SIZE_t i, p, k
-        cdef DOUBLE_t y_ik
         cdef DOUBLE_t median
         cdef DOUBLE_t w = 1.0
         cdef DOUBLE_t impurity_left = 0.0
@@ -1284,12 +1247,10 @@ cdef class MAE(RegressionCriterion):
             for p in range(start, pos):
                 i = samples[p]
 
-                y_ik = y[i * self.y_stride + k]
-
                 if sample_weight != NULL:
                     w = sample_weight[i]
 
-                impurity_left += fabs(y_ik - median) * w
+                impurity_left += fabs(self.y[i, k] - median) * w
         p_impurity_left[0] = impurity_left / (self.weighted_n_left * 
                                               self.n_outputs)
 
@@ -1298,12 +1259,10 @@ cdef class MAE(RegressionCriterion):
             for p in range(pos, end):
                 i = samples[p]
 
-                y_ik = y[i * self.y_stride + k]
-
                 if sample_weight != NULL:
                     w = sample_weight[i]
 
-                impurity_right += fabs(y_ik - median) * w
+                impurity_right += fabs(self.y[i, k] - median) * w
         p_impurity_right[0] = impurity_right / (self.weighted_n_right * 
                                                 self.n_outputs)
 
diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index 4d5c5ae46bceb..5ca7303659a68 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -1,3 +1,5 @@
+# cython: language_level=3
+
 # Authors: Gilles Louppe <g.louppe@gmail.com>
 #          Peter Prettenhofer <peter.prettenhofer@gmail.com>
 #          Brian Holt <bdholt1@gmail.com>
@@ -14,11 +16,11 @@ cimport numpy as np
 
 from ._criterion cimport Criterion
 
-ctypedef np.npy_float32 DTYPE_t          # Type of X
-ctypedef np.npy_float64 DOUBLE_t         # Type of y, sample_weight
-ctypedef np.npy_intp SIZE_t              # Type for indices and counters
-ctypedef np.npy_int32 INT32_t            # Signed 32 bit integer
-ctypedef np.npy_uint32 UINT32_t          # Unsigned 32 bit integer
+from ._tree cimport DTYPE_t          # Type of X
+from ._tree cimport DOUBLE_t         # Type of y, sample_weight
+from ._tree cimport SIZE_t           # Type for indices and counters
+from ._tree cimport INT32_t          # Signed 32 bit integer
+from ._tree cimport UINT32_t         # Unsigned 32 bit integer
 
 cdef struct SplitRecord:
     # Data to track sample split
@@ -60,8 +62,7 @@ cdef class Splitter:
     cdef bint presort                    # Whether to use presorting, only
                                          # allowed on dense data
 
-    cdef DOUBLE_t* y
-    cdef SIZE_t y_stride
+    cdef const DOUBLE_t[:, ::1] y
     cdef DOUBLE_t* sample_weight
 
     # The samples vector `samples` is maintained by the Splitter object such
@@ -81,7 +82,7 @@ cdef class Splitter:
     # This allows optimization with depth-based tree building.
 
     # Methods
-    cdef int init(self, object X, np.ndarray y,
+    cdef int init(self, object X, const DOUBLE_t[:, ::1] y,
                   DOUBLE_t* sample_weight,
                   np.ndarray X_idx_sorted=*) except -1
 
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 3f5a176d9171a..fab9aabb03e28 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -92,8 +92,6 @@ cdef class Splitter:
         self.n_features = 0
         self.feature_values = NULL
 
-        self.y = NULL
-        self.y_stride = 0
         self.sample_weight = NULL
 
         self.max_features = max_features
@@ -118,7 +116,7 @@ cdef class Splitter:
 
     cdef int init(self,
                    object X,
-                   np.ndarray[DOUBLE_t, ndim=2, mode="c"] y,
+                   DOUBLE_t[:, ::1] y,
                    DOUBLE_t* sample_weight,
                    np.ndarray X_idx_sorted=None) except -1:
         """Initialize the splitter.
@@ -179,8 +177,7 @@ cdef class Splitter:
         safe_realloc(&self.feature_values, n_samples)
         safe_realloc(&self.constant_features, n_features)
 
-        self.y = <DOUBLE_t*> y.data
-        self.y_stride = <SIZE_t> y.strides[0] / <SIZE_t> y.itemsize
+        self.y = y
 
         self.sample_weight = sample_weight
         return 0
@@ -206,7 +203,6 @@ cdef class Splitter:
         self.end = end
 
         self.criterion.init(self.y,
-                            self.y_stride,
                             self.sample_weight,
                             self.weighted_n_samples,
                             self.samples,
@@ -240,9 +236,7 @@ cdef class Splitter:
 
 
 cdef class BaseDenseSplitter(Splitter):
-    cdef DTYPE_t* X
-    cdef SIZE_t X_sample_stride
-    cdef SIZE_t X_feature_stride
+    cdef DTYPE_t[:, :] X
 
     cdef np.ndarray X_idx_sorted
     cdef INT32_t* X_idx_sorted_ptr
@@ -254,9 +248,6 @@ cdef class BaseDenseSplitter(Splitter):
                   SIZE_t min_samples_leaf, double min_weight_leaf,
                   object random_state, bint presort):
 
-        self.X = NULL
-        self.X_sample_stride = 0
-        self.X_feature_stride = 0
         self.X_idx_sorted_ptr = NULL
         self.X_idx_sorted_stride = 0
         self.sample_mask = NULL
@@ -269,7 +260,7 @@ cdef class BaseDenseSplitter(Splitter):
 
     cdef int init(self,
                   object X,
-                  np.ndarray[DOUBLE_t, ndim=2, mode="c"] y,
+                  DOUBLE_t[:, ::1] y,
                   DOUBLE_t* sample_weight,
                   np.ndarray X_idx_sorted=None) except -1:
         """Initialize the splitter
@@ -281,12 +272,7 @@ cdef class BaseDenseSplitter(Splitter):
         # Call parent init
         Splitter.init(self, X, y, sample_weight)
 
-        # Initialize X
-        cdef np.ndarray X_ndarray = X
-
-        self.X = <DTYPE_t*> X_ndarray.data
-        self.X_sample_stride = <SIZE_t> X.strides[0] / <SIZE_t> X.itemsize
-        self.X_feature_stride = <SIZE_t> X.strides[1] / <SIZE_t> X.itemsize
+        self.X = X
 
         if self.presort == 1:
             self.X_idx_sorted = X_idx_sorted
@@ -327,10 +313,7 @@ cdef class BestSplitter(BaseDenseSplitter):
         cdef SIZE_t* constant_features = self.constant_features
         cdef SIZE_t n_features = self.n_features
 
-        cdef DTYPE_t* X = self.X
         cdef DTYPE_t* Xf = self.feature_values
-        cdef SIZE_t X_sample_stride = self.X_sample_stride
-        cdef SIZE_t X_feature_stride = self.X_feature_stride
         cdef SIZE_t max_features = self.max_features
         cdef SIZE_t min_samples_leaf = self.min_samples_leaf
         cdef double min_weight_leaf = self.min_weight_leaf
@@ -414,7 +397,6 @@ cdef class BestSplitter(BaseDenseSplitter):
                 f_j += n_found_constants
                 # f_j in the interval [n_total_constants, f_i[
                 current.feature = features[f_j]
-                feature_offset = self.X_feature_stride * current.feature
 
                 # Sort samples along that feature; either by utilizing
                 # presorting, or by copying the values into an array and
@@ -428,11 +410,11 @@ cdef class BestSplitter(BaseDenseSplitter):
                         j = X_idx_sorted[i + feature_idx_offset]
                         if sample_mask[j] == 1:
                             samples[p] = j
-                            Xf[p] = X[self.X_sample_stride * j + feature_offset]
+                            Xf[p] = self.X[j, current.feature]
                             p += 1
                 else:
                     for i in range(start, end):
-                        Xf[i] = X[self.X_sample_stride * samples[i] + feature_offset]
+                        Xf[i] = self.X[samples[i], current.feature]
 
                     sort(Xf + start, samples + start, end - start)
 
@@ -493,12 +475,11 @@ cdef class BestSplitter(BaseDenseSplitter):
 
         # Reorganize into samples[start:best.pos] + samples[best.pos:end]
         if best.pos < end:
-            feature_offset = X_feature_stride * best.feature
             partition_end = end
             p = start
 
             while p < partition_end:
-                if X[X_sample_stride * samples[p] + feature_offset] <= best.threshold:
+                if self.X[samples[p], best.feature] <= best.threshold:
                     p += 1
 
                 else:
@@ -675,10 +656,7 @@ cdef class RandomSplitter(BaseDenseSplitter):
         cdef SIZE_t* constant_features = self.constant_features
         cdef SIZE_t n_features = self.n_features
 
-        cdef DTYPE_t* X = self.X
         cdef DTYPE_t* Xf = self.feature_values
-        cdef SIZE_t X_sample_stride = self.X_sample_stride
-        cdef SIZE_t X_feature_stride = self.X_feature_stride
         cdef SIZE_t max_features = self.max_features
         cdef SIZE_t min_samples_leaf = self.min_samples_leaf
         cdef double min_weight_leaf = self.min_weight_leaf
@@ -753,15 +731,14 @@ cdef class RandomSplitter(BaseDenseSplitter):
                 # f_j in the interval [n_total_constants, f_i[
 
                 current.feature = features[f_j]
-                feature_stride = X_feature_stride * current.feature
 
                 # Find min, max
-                min_feature_value = X[X_sample_stride * samples[start] + feature_stride]
+                min_feature_value = self.X[samples[start], current.feature]
                 max_feature_value = min_feature_value
                 Xf[start] = min_feature_value
 
                 for p in range(start + 1, end):
-                    current_feature_value = X[X_sample_stride * samples[p] + feature_stride]
+                    current_feature_value = self.X[samples[p], current.feature]
                     Xf[p] = current_feature_value
 
                     if current_feature_value < min_feature_value:
@@ -828,14 +805,13 @@ cdef class RandomSplitter(BaseDenseSplitter):
                         best = current  # copy
 
         # Reorganize into samples[start:best.pos] + samples[best.pos:end]
-        feature_stride = X_feature_stride * best.feature
         if best.pos < end:
             if current.feature != best.feature:
                 partition_end = end
                 p = start
 
                 while p < partition_end:
-                    if X[X_sample_stride * samples[p] + feature_stride] <= best.threshold:
+                    if self.X[samples[p], best.feature] <= best.threshold:
                         p += 1
 
                     else:
@@ -900,7 +876,7 @@ cdef class BaseSparseSplitter(Splitter):
 
     cdef int init(self,
                   object X,
-                  np.ndarray[DOUBLE_t, ndim=2, mode="c"] y,
+                  DOUBLE_t[:, ::1] y,
                   DOUBLE_t* sample_weight,
                   np.ndarray X_idx_sorted=None) except -1:
         """Initialize the splitter
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index ed259c98ac850..c7aee769ae213 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -794,10 +794,7 @@ cdef class Tree:
             raise ValueError("X.dtype should be np.float32, got %s" % X.dtype)
 
         # Extract input
-        cdef np.ndarray X_ndarray = X
-        cdef DTYPE_t* X_ptr = <DTYPE_t*> X_ndarray.data
-        cdef SIZE_t X_sample_stride = <SIZE_t> X.strides[0] / <SIZE_t> X.itemsize
-        cdef SIZE_t X_fx_stride = <SIZE_t> X.strides[1] / <SIZE_t> X.itemsize
+        cdef DTYPE_t[:, :] X_ndarray = X
         cdef SIZE_t n_samples = X.shape[0]
 
         # Initialize output
@@ -814,8 +811,7 @@ cdef class Tree:
                 # While node not a leaf
                 while node.left_child != _TREE_LEAF:
                     # ... and node.right_child != _TREE_LEAF:
-                    if X_ptr[X_sample_stride * i +
-                             X_fx_stride * node.feature] <= node.threshold:
+                    if X_ndarray[i, node.feature] <= node.threshold:
                         node = &self.nodes[node.left_child]
                     else:
                         node = &self.nodes[node.right_child]
@@ -918,10 +914,7 @@ cdef class Tree:
             raise ValueError("X.dtype should be np.float32, got %s" % X.dtype)
 
         # Extract input
-        cdef np.ndarray X_ndarray = X
-        cdef DTYPE_t* X_ptr = <DTYPE_t*> X_ndarray.data
-        cdef SIZE_t X_sample_stride = <SIZE_t> X.strides[0] / <SIZE_t> X.itemsize
-        cdef SIZE_t X_fx_stride = <SIZE_t> X.strides[1] / <SIZE_t> X.itemsize
+        cdef DTYPE_t[:, :] X_ndarray = X
         cdef SIZE_t n_samples = X.shape[0]
 
         # Initialize output
@@ -948,8 +941,7 @@ cdef class Tree:
                     indices_ptr[indptr_ptr[i + 1]] = <SIZE_t>(node - self.nodes)
                     indptr_ptr[i + 1] += 1
 
-                    if X_ptr[X_sample_stride * i +
-                             X_fx_stride * node.feature] <= node.threshold:
+                    if X_ndarray[i, node.feature] <= node.threshold:
                         node = &self.nodes[node.left_child]
                     else:
                         node = &self.nodes[node.right_child]
diff --git a/sklearn/tree/_utils.pxd b/sklearn/tree/_utils.pxd
index 04806ade180c2..60a4f552a9527 100644
--- a/sklearn/tree/_utils.pxd
+++ b/sklearn/tree/_utils.pxd
@@ -1,3 +1,5 @@
+# cython: language_level=3
+
 # Authors: Gilles Louppe <g.louppe@gmail.com>
 #          Peter Prettenhofer <peter.prettenhofer@gmail.com>
 #          Arnaud Joly <arnaud.v.joly@gmail.com>
@@ -10,7 +12,7 @@
 
 import numpy as np
 cimport numpy as np
-from _tree cimport Node
+from ._tree cimport Node
 from sklearn.neighbors.quad_tree cimport Cell
 
 ctypedef np.npy_float32 DTYPE_t          # Type of X