Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/whats_new/v1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ Changelog
are always consistent with `scipy.spatial.distance.cdist`.
:pr:`21741` by :user:`Olivier Grisel <ogrisel>`.

:mod:`sklearn.neighbors`
........................

- |Fix| :class:`neighbors.KDTree` and :class:`neighbors.BallTree` correctly supports
read-only buffer attributes. :pr:`21845` by `Thomas Fan`_.

:mod:`sklearn.preprocessing`
............................

Expand Down
2 changes: 1 addition & 1 deletion sklearn/_min_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
JOBLIB_MIN_VERSION = "0.11"
THREADPOOLCTL_MIN_VERSION = "2.0.0"
PYTEST_MIN_VERSION = "5.0.1"
CYTHON_MIN_VERSION = "0.28.5"
CYTHON_MIN_VERSION = "0.29.24"


# 'build' and 'install' is included to have structured metadata for CI.
Expand Down
8 changes: 4 additions & 4 deletions sklearn/neighbors/_ball_tree.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ cdef int allocate_data(BinaryTree tree, ITYPE_t n_nodes,
return 0


cdef int init_node(BinaryTree tree, ITYPE_t i_node,
cdef int init_node(BinaryTree tree, NodeData_t[::1] node_data, ITYPE_t i_node,
ITYPE_t idx_start, ITYPE_t idx_end) except -1:
"""Initialize the node for the dataset stored in tree.data"""
cdef ITYPE_t n_features = tree.data.shape[1]
Expand Down Expand Up @@ -94,9 +94,9 @@ cdef int init_node(BinaryTree tree, ITYPE_t i_node,
data + n_features * idx_array[i],
n_features))

tree.node_data[i_node].radius = tree.dist_metric._rdist_to_dist(radius)
tree.node_data[i_node].idx_start = idx_start
tree.node_data[i_node].idx_end = idx_end
node_data[i_node].radius = tree.dist_metric._rdist_to_dist(radius)
node_data[i_node].idx_start = idx_start
node_data[i_node].idx_end = idx_end
return 0


Expand Down
31 changes: 20 additions & 11 deletions sklearn/neighbors/_binary_tree.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -900,9 +900,13 @@ cdef class BinaryTree:
cdef readonly const DTYPE_t[:, ::1] data
cdef readonly const DTYPE_t[::1] sample_weight
cdef public DTYPE_t sum_weight
cdef public ITYPE_t[::1] idx_array
cdef public NodeData_t[::1] node_data
cdef public DTYPE_t[:, :, ::1] node_bounds

# Even if those memoryviews attributes are const-qualified,
# they get modified via their numpy counterpart.
# For instance, `node_data` gets modified via `node_data_arr`.
cdef public const ITYPE_t[::1] idx_array
cdef public const NodeData_t[::1] node_data
cdef public const DTYPE_t[:, :, ::1] node_bounds

cdef ITYPE_t leaf_size
cdef ITYPE_t n_levels
Expand Down Expand Up @@ -986,7 +990,12 @@ cdef class BinaryTree:

# Allocate tree-specific data
allocate_data(self, self.n_nodes, n_features)
self._recursive_build(0, 0, n_samples)
self._recursive_build(
node_data=self.node_data_arr,
i_node=0,
idx_start=0,
idx_end=n_samples
)

def _update_sample_weight(self, n_samples, sample_weight):
if sample_weight is not None:
Expand Down Expand Up @@ -1133,7 +1142,7 @@ cdef class BinaryTree:
else:
return self.dist_metric.rdist(x1, x2, size)

cdef int _recursive_build(self, ITYPE_t i_node, ITYPE_t idx_start,
cdef int _recursive_build(self, NodeData_t[::1] node_data, ITYPE_t i_node, ITYPE_t idx_start,
ITYPE_t idx_end) except -1:
"""Recursively build the tree.

Expand All @@ -1153,10 +1162,10 @@ cdef class BinaryTree:
cdef DTYPE_t* data = &self.data[0, 0]

# initialize node data
init_node(self, i_node, idx_start, idx_end)
init_node(self, node_data, i_node, idx_start, idx_end)

if 2 * i_node + 1 >= self.n_nodes:
self.node_data[i_node].is_leaf = True
node_data[i_node].is_leaf = True
if idx_end - idx_start > 2 * self.leaf_size:
# this shouldn't happen if our memory allocation is correct
# we'll proactively prevent memory errors, but raise a
Expand All @@ -1171,18 +1180,18 @@ cdef class BinaryTree:
import warnings
warnings.warn("Internal: memory layout is flawed: "
"too many nodes allocated")
self.node_data[i_node].is_leaf = True
node_data[i_node].is_leaf = True

else:
# split node and recursively construct child nodes.
self.node_data[i_node].is_leaf = False
node_data[i_node].is_leaf = False
i_max = find_node_split_dim(data, idx_array,
n_features, n_points)
partition_node_indices(data, idx_array, i_max, n_mid,
n_features, n_points)
self._recursive_build(2 * i_node + 1,
self._recursive_build(node_data,2 * i_node + 1,
idx_start, idx_start + n_mid)
self._recursive_build(2 * i_node + 2,
self._recursive_build(node_data, 2 * i_node + 2,
idx_start + n_mid, idx_end)

def query(self, X, k=1, return_distance=True,
Expand Down
8 changes: 4 additions & 4 deletions sklearn/neighbors/_kd_tree.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ cdef int allocate_data(BinaryTree tree, ITYPE_t n_nodes,
return 0


cdef int init_node(BinaryTree tree, ITYPE_t i_node,
cdef int init_node(BinaryTree tree, NodeData_t[::1] node_data, ITYPE_t i_node,
ITYPE_t idx_start, ITYPE_t idx_end) except -1:
"""Initialize the node for the dataset stored in tree.data"""
cdef ITYPE_t n_features = tree.data.shape[1]
Expand Down Expand Up @@ -72,13 +72,13 @@ cdef int init_node(BinaryTree tree, ITYPE_t i_node,
rad += pow(0.5 * abs(upper_bounds[j] - lower_bounds[j]),
tree.dist_metric.p)

tree.node_data[i_node].idx_start = idx_start
tree.node_data[i_node].idx_end = idx_end
node_data[i_node].idx_start = idx_start
node_data[i_node].idx_end = idx_end

# The radius will hold the size of the circumscribed hypersphere measured
# with the specified metric: in querying, this is used as a measure of the
# size of each node when deciding which nodes to split.
tree.node_data[i_node].radius = pow(rad, 1. / tree.dist_metric.p)
node_data[i_node].radius = pow(rad, 1. / tree.dist_metric.p)
return 0


Expand Down
16 changes: 16 additions & 0 deletions sklearn/neighbors/tests/test_kd_tree.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import numpy as np
import pytest
from joblib import Parallel
from sklearn.utils.fixes import delayed

from sklearn.neighbors._kd_tree import KDTree

Expand All @@ -13,3 +15,17 @@ def test_array_object_type():
X = np.array([(1, 2, 3), (2, 5), (5, 5, 1, 2)], dtype=object)
with pytest.raises(ValueError, match="setting an array element with a sequence"):
KDTree(X)


def test_kdtree_picklable_with_joblib():
"""Make sure that KDTree queries work when joblib memmaps.

Non-regression test for #21685 and #21228."""
rng = np.random.RandomState(0)
X = rng.random_sample((10, 3))
tree = KDTree(X, leaf_size=2)

# Call Parallel with max_nbytes=1 to trigger readonly memory mapping that
# use to raise "ValueError: buffer source array is read-only" in a previous
# version of the Cython code.
Parallel(n_jobs=2, max_nbytes=1)(delayed(tree.query)(data) for data in 2 * [X])