diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index 77ac981934468..059056275ef3d 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -19,7 +19,7 @@ from ..metrics.pairwise import paired_distances from ..metrics.pairwise import _VALID_METRICS from ..metrics import DistanceMetric -from ..metrics._dist_metrics import METRIC_MAPPING +from ..metrics._dist_metrics import METRIC_MAPPING64 from ..utils import check_array from ..utils._fast_dict import IntFloatDict from ..utils.graph import _fix_connected_components @@ -543,7 +543,7 @@ def linkage_tree( linkage == "single" and affinity != "precomputed" and not callable(affinity) - and affinity in METRIC_MAPPING + and affinity in METRIC_MAPPING64 ): # We need the fast cythonized metric from neighbors dist_metric = DistanceMetric.get_metric(affinity) diff --git a/sklearn/cluster/_hdbscan/_linkage.pyx b/sklearn/cluster/_hdbscan/_linkage.pyx index 0cb51a7b42992..03e91ac8d6833 100644 --- a/sklearn/cluster/_hdbscan/_linkage.pyx +++ b/sklearn/cluster/_hdbscan/_linkage.pyx @@ -35,7 +35,7 @@ cimport numpy as cnp from libc.float cimport DBL_MAX import numpy as np -from ...metrics._dist_metrics cimport DistanceMetric +from ...metrics._dist_metrics cimport DistanceMetric64 from ...cluster._hierarchical_fast cimport UnionFind from ...cluster._hdbscan._tree cimport HIERARCHY_t from ...cluster._hdbscan._tree import HIERARCHY_dtype @@ -111,7 +111,7 @@ cpdef cnp.ndarray[MST_edge_t, ndim=1, mode='c'] mst_from_mutual_reachability( cpdef cnp.ndarray[MST_edge_t, ndim=1, mode='c'] mst_from_data_matrix( const float64_t[:, ::1] raw_data, const float64_t[::1] core_distances, - DistanceMetric dist_metric, + DistanceMetric64 dist_metric, float64_t alpha=1.0 ): """Compute the Minimum Spanning Tree (MST) representation of the mutual- diff --git a/sklearn/cluster/_hierarchical_fast.pyx b/sklearn/cluster/_hierarchical_fast.pyx index 7b996c3978c62..bf43454458e48 100644 --- a/sklearn/cluster/_hierarchical_fast.pyx +++ b/sklearn/cluster/_hierarchical_fast.pyx @@ -3,7 +3,7 @@ import numpy as np cimport cython -from ..metrics._dist_metrics cimport DistanceMetric +from ..metrics._dist_metrics cimport DistanceMetric64 from ..utils._fast_dict cimport IntFloatDict from ..utils._typedefs cimport float64_t, intp_t, uint8_t @@ -427,7 +427,7 @@ def single_linkage_label(L): # Implements MST-LINKAGE-CORE from https://arxiv.org/abs/1109.2378 def mst_linkage_core( const float64_t [:, ::1] raw_data, - DistanceMetric dist_metric): + DistanceMetric64 dist_metric): """ Compute the necessary elements of a minimum spanning tree for computation of single linkage clustering. This @@ -444,8 +444,8 @@ def mst_linkage_core( raw_data: array of shape (n_samples, n_features) The array of feature data to be clustered. Must be C-aligned - dist_metric: DistanceMetric - A DistanceMetric object conforming to the API from + dist_metric: DistanceMetric64 + A DistanceMetric64 object conforming to the API from ``sklearn.metrics._dist_metrics.pxd`` that will be used to compute distances. diff --git a/sklearn/metrics/_dist_metrics.pxd.tp b/sklearn/metrics/_dist_metrics.pxd.tp index 511e0941a7098..60b8da3ecfa46 100644 --- a/sklearn/metrics/_dist_metrics.pxd.tp +++ b/sklearn/metrics/_dist_metrics.pxd.tp @@ -3,20 +3,8 @@ implementation_specific_values = [ # Values are the following ones: # - # name_suffix, INPUT_DTYPE_t, INPUT_DTYPE - # - # On the first hand, an empty string is used for `name_suffix` - # for the float64 case as to still be able to expose the original - # float64 implementation under the same API, namely `DistanceMetric`. - # - # On the other hand, '32' is used for `name_suffix` for the float32 - # case to remove ambiguity and use `DistanceMetric32`, which is not - # publicly exposed. - # - # The metric mapping is adapted accordingly to route to the correct - # implementations. - # - ('', 'float64_t', 'np.float64'), + # name_suffix, INPUT_DTYPE_t, INPUT_DTYPE + ('64', 'float64_t', 'np.float64'), ('32', 'float32_t', 'np.float32') ] @@ -25,6 +13,9 @@ from libc.math cimport sqrt, exp from ..utils._typedefs cimport float64_t, float32_t, int32_t, intp_t +cdef class DistanceMetric: + pass + {{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} ###################################################################### @@ -68,7 +59,7 @@ cdef inline float64_t euclidean_rdist_to_dist{{name_suffix}}(const {{INPUT_DTYPE ###################################################################### # DistanceMetric{{name_suffix}} base class -cdef class DistanceMetric{{name_suffix}}: +cdef class DistanceMetric{{name_suffix}}(DistanceMetric): # The following attributes are required for a few of the subclasses. # we must define them here so that cython's limited polymorphism will work. # Because we don't expect to instantiate a lot of these objects, the diff --git a/sklearn/metrics/_dist_metrics.pyx.tp b/sklearn/metrics/_dist_metrics.pyx.tp index b3b9ade53b3d9..bc54e51a7511a 100644 --- a/sklearn/metrics/_dist_metrics.pyx.tp +++ b/sklearn/metrics/_dist_metrics.pyx.tp @@ -3,20 +3,8 @@ implementation_specific_values = [ # Values are the following ones: # - # name_suffix, INPUT_DTYPE_t, INPUT_DTYPE - # - # On the first hand, an empty string is used for `name_suffix` - # for the float64 case as to still be able to expose the original - # float64 implementation under the same API, namely `DistanceMetric`. - # - # On the other hand, '32' bit is used for `name_suffix` for the float32 - # case to remove ambiguity and use `DistanceMetric32`, which is not - # publicly exposed. - # - # The metric mapping is adapted accordingly to route to the correct - # implementations. - # - ('', 'float64_t', 'np.float64'), + # name_suffix, INPUT_DTYPE_t, INPUT_DTYPE + ('64', 'float64_t', 'np.float64'), ('32', 'float32_t', 'np.float32') ] @@ -73,9 +61,36 @@ def get_valid_metric_ids(L): >>> sorted(L) ['cityblock', 'euclidean', 'l1', 'l2', 'manhattan'] """ - return [key for (key, val) in METRIC_MAPPING.items() + return [key for (key, val) in METRIC_MAPPING64.items() if (val.__name__ in L) or (val in L)] +cdef class DistanceMetric: + @classmethod + def get_metric(cls, metric, dtype=np.float64, **kwargs): + """Get the given distance metric from the string identifier. + + See the docstring of DistanceMetric for a list of available metrics. + + Parameters + ---------- + metric : str or class name + The distance metric to use + dtype : {np.float32, np.float64}, default=np.float64 + The dtype of the data on which the metric will be applied + **kwargs + additional arguments will be passed to the requested metric + """ + if dtype == np.float32: + specialized_class = DistanceMetric32 + elif dtype == np.float64: + specialized_class = DistanceMetric64 + else: + raise ValueError( + f"Unexpected dtype {dtype} provided. Please select a dtype from" + " {np.float32, np.float64}" + ) + + return specialized_class.get_metric(metric, **kwargs) {{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} @@ -125,7 +140,7 @@ cdef {{INPUT_DTYPE_t}} INF{{name_suffix}} = np.inf ###################################################################### # Distance Metric Classes -cdef class DistanceMetric{{name_suffix}}: +cdef class DistanceMetric{{name_suffix}}(DistanceMetric): """DistanceMetric class This class provides a uniform interface to fast distance metric diff --git a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd.tp index 23337cb2b59d6..fc56a59cab16f 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd.tp @@ -3,17 +3,14 @@ implementation_specific_values = [ # Values are the following ones: # - # name_suffix, DistanceMetric, INPUT_DTYPE_t, INPUT_DTYPE - # - # We use DistanceMetric for float64 for backward naming compatibility. - # - ('64', 'DistanceMetric', 'float64_t'), + # name_suffix, INPUT_DTYPE_t, INPUT_DTYPE + ('64', 'DistanceMetric64', 'float64_t'), ('32', 'DistanceMetric32', 'float32_t') ] }} from ...utils._typedefs cimport float64_t, float32_t, int32_t, intp_t -from ...metrics._dist_metrics cimport DistanceMetric, DistanceMetric32 +from ...metrics._dist_metrics cimport DistanceMetric64, DistanceMetric32, DistanceMetric {{for name_suffix, DistanceMetric, INPUT_DTYPE_t in implementation_specific_values}} diff --git a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp index 5569c1f231d62..40a9a45e8b8e1 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp @@ -3,11 +3,8 @@ implementation_specific_values = [ # Values are the following ones: # - # name_suffix, DistanceMetric, INPUT_DTYPE_t, INPUT_DTYPE - # - # We use DistanceMetric for float64 for backward naming compatibility. - # - ('64', 'DistanceMetric', 'float64_t', 'np.float64'), + # name_suffix, INPUT_DTYPE_t, INPUT_DTYPE + ('64', 'DistanceMetric64', 'float64_t', 'np.float64'), ('32', 'DistanceMetric32', 'float32_t', 'np.float32') ] @@ -17,7 +14,6 @@ import numpy as np from cython cimport final from ...utils._typedefs cimport float64_t, float32_t, intp_t -from ...metrics._dist_metrics cimport DistanceMetric from scipy.sparse import issparse, csr_matrix @@ -96,8 +92,9 @@ cdef class DatasetsPair{{name_suffix}}: if metric_kwargs is not None: metric_kwargs.pop("Y_norm_squared", None) cdef: - {{DistanceMetric}} distance_metric = {{DistanceMetric}}.get_metric( + {{DistanceMetric}} distance_metric = DistanceMetric.get_metric( metric, + {{INPUT_DTYPE}}, **(metric_kwargs or {}) ) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py index f72861bcbeb8c..5f4325af3a09f 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py +++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py @@ -6,7 +6,7 @@ from scipy.sparse import isspmatrix_csr, issparse -from .._dist_metrics import BOOL_METRICS, METRIC_MAPPING +from .._dist_metrics import BOOL_METRICS, METRIC_MAPPING64 from ._base import _sqeuclidean_row_norms32, _sqeuclidean_row_norms64 from ._argkmin import ( @@ -76,7 +76,7 @@ def valid_metrics(cls) -> List[str]: "hamming", *BOOL_METRICS, } - return sorted(({"sqeuclidean"} | set(METRIC_MAPPING.keys())) - excluded) + return sorted(({"sqeuclidean"} | set(METRIC_MAPPING64.keys())) - excluded) @classmethod def is_usable_for(cls, X, Y, metric) -> bool: diff --git a/sklearn/metrics/tests/test_dist_metrics.py b/sklearn/metrics/tests/test_dist_metrics.py index 598eb9b39357e..fc9b006e2fefd 100644 --- a/sklearn/metrics/tests/test_dist_metrics.py +++ b/sklearn/metrics/tests/test_dist_metrics.py @@ -11,8 +11,8 @@ from sklearn.metrics._dist_metrics import ( BOOL_METRICS, - # Unexposed private DistanceMetric for 32 bit DistanceMetric32, + DistanceMetric64, ) from sklearn.utils import check_random_state @@ -64,9 +64,6 @@ def dist_func(x1, x2, p): ) @pytest.mark.parametrize("X, Y", [(X64, Y64), (X32, Y32), (X_mmap, Y_mmap)]) def test_cdist(metric_param_grid, X, Y): - DistanceMetricInterface = ( - DistanceMetric if X.dtype == Y.dtype == np.float64 else DistanceMetric32 - ) metric, param_grid = metric_param_grid keys = param_grid.keys() X_csr, Y_csr = sp.csr_matrix(X), sp.csr_matrix(Y) @@ -83,7 +80,7 @@ def test_cdist(metric_param_grid, X, Y): D_scipy_cdist = cdist(X, Y, metric, **kwargs) - dm = DistanceMetricInterface.get_metric(metric, **kwargs) + dm = DistanceMetric.get_metric(metric, X.dtype, **kwargs) # DistanceMetric.pairwise must be consistent for all # combinations of formats in {sparse, dense}. @@ -141,9 +138,6 @@ def test_cdist_bool_metric(metric, X_bool, Y_bool): ) @pytest.mark.parametrize("X", [X64, X32, X_mmap]) def test_pdist(metric_param_grid, X): - DistanceMetricInterface = ( - DistanceMetric if X.dtype == np.float64 else DistanceMetric32 - ) metric, param_grid = metric_param_grid keys = param_grid.keys() X_csr = sp.csr_matrix(X) @@ -160,7 +154,7 @@ def test_pdist(metric_param_grid, X): D_scipy_pdist = cdist(X, X, metric, **kwargs) - dm = DistanceMetricInterface.get_metric(metric, **kwargs) + dm = DistanceMetric.get_metric(metric, X.dtype, **kwargs) D_sklearn = dm.pairwise(X) assert D_sklearn.flags.c_contiguous assert_allclose(D_sklearn, D_scipy_pdist, **rtol_dict) @@ -189,8 +183,8 @@ def test_distance_metrics_dtype_consistency(metric_param_grid): for vals in itertools.product(*param_grid.values()): kwargs = dict(zip(keys, vals)) - dm64 = DistanceMetric.get_metric(metric, **kwargs) - dm32 = DistanceMetric32.get_metric(metric, **kwargs) + dm64 = DistanceMetric.get_metric(metric, np.float64, **kwargs) + dm32 = DistanceMetric.get_metric(metric, np.float32, **kwargs) D64 = dm64.pairwise(X64) D32 = dm32.pairwise(X32) @@ -230,9 +224,6 @@ def test_pdist_bool_metrics(metric, X_bool): ) @pytest.mark.parametrize("X", [X64, X32]) def test_pickle(writable_kwargs, metric_param_grid, X): - DistanceMetricInterface = ( - DistanceMetric if X.dtype == np.float64 else DistanceMetric32 - ) metric, param_grid = metric_param_grid keys = param_grid.keys() for vals in itertools.product(*param_grid.values()): @@ -242,7 +233,7 @@ def test_pickle(writable_kwargs, metric_param_grid, X): if isinstance(val, np.ndarray): val.setflags(write=writable_kwargs) kwargs = dict(zip(keys, vals)) - dm = DistanceMetricInterface.get_metric(metric, **kwargs) + dm = DistanceMetric.get_metric(metric, X.dtype, **kwargs) D1 = dm.pairwise(X) dm2 = pickle.loads(pickle.dumps(dm)) D2 = dm2.pairwise(X) @@ -261,10 +252,6 @@ def test_pickle_bool_metrics(metric, X_bool): @pytest.mark.parametrize("X, Y", [(X64, Y64), (X32, Y32), (X_mmap, Y_mmap)]) def test_haversine_metric(X, Y): - DistanceMetricInterface = ( - DistanceMetric if X.dtype == np.float64 else DistanceMetric32 - ) - # The Haversine DistanceMetric only works on 2 features. X = np.asarray(X[:, :2]) Y = np.asarray(Y[:, :2]) @@ -286,7 +273,7 @@ def haversine_slow(x1, x2): for j, yj in enumerate(Y): D_reference[i, j] = haversine_slow(xi, yj) - haversine = DistanceMetricInterface.get_metric("haversine") + haversine = DistanceMetric.get_metric("haversine", X.dtype) D_sklearn = haversine.pairwise(X, Y) assert_allclose( @@ -389,3 +376,26 @@ def test_minkowski_metric_validate_weights_size(): ) with pytest.raises(ValueError, match=msg): dm.pairwise(X64, Y64) + + +@pytest.mark.parametrize("metric, metric_kwargs", METRICS_DEFAULT_PARAMS) +@pytest.mark.parametrize("dtype", (np.float32, np.float64)) +def test_get_metric_dtype(metric, metric_kwargs, dtype): + specialized_cls = { + np.float32: DistanceMetric32, + np.float64: DistanceMetric64, + }[dtype] + + # We don't need the entire grid, just one for a sanity check + metric_kwargs = {k: v[0] for k, v in metric_kwargs.items()} + generic_type = type(DistanceMetric.get_metric(metric, dtype, **metric_kwargs)) + specialized_type = type(specialized_cls.get_metric(metric, **metric_kwargs)) + + assert generic_type is specialized_type + + +def test_get_metric_bad_dtype(): + dtype = np.int32 + msg = r"Unexpected dtype .* provided. Please select a dtype from" + with pytest.raises(ValueError, match=msg): + DistanceMetric.get_metric("manhattan", dtype) diff --git a/sklearn/neighbors/__init__.py b/sklearn/neighbors/__init__.py index 12824e9cb684e..8223c20991904 100644 --- a/sklearn/neighbors/__init__.py +++ b/sklearn/neighbors/__init__.py @@ -5,7 +5,6 @@ from ._ball_tree import BallTree from ._kd_tree import KDTree -from ._distance_metric import DistanceMetric from ._graph import kneighbors_graph, radius_neighbors_graph from ._graph import KNeighborsTransformer, RadiusNeighborsTransformer from ._unsupervised import NearestNeighbors @@ -20,7 +19,6 @@ __all__ = [ "BallTree", - "DistanceMetric", "KDTree", "KNeighborsClassifier", "KNeighborsRegressor", diff --git a/sklearn/neighbors/_ball_tree.pyx b/sklearn/neighbors/_ball_tree.pyx index cf02bd37c5829..d9b933cb43c66 100644 --- a/sklearn/neighbors/_ball_tree.pyx +++ b/sklearn/neighbors/_ball_tree.pyx @@ -5,16 +5,26 @@ __all__ = ['BallTree'] DOC_DICT = {'BinaryTree': 'BallTree', 'binary_tree': 'ball_tree'} -VALID_METRICS = ['EuclideanDistance', 'SEuclideanDistance', - 'ManhattanDistance', 'ChebyshevDistance', - 'MinkowskiDistance', 'WMinkowskiDistance', - 'MahalanobisDistance', 'HammingDistance', - 'CanberraDistance', 'BrayCurtisDistance', - 'JaccardDistance', 'DiceDistance', - 'RogersTanimotoDistance', 'RussellRaoDistance', - 'SokalMichenerDistance', 'SokalSneathDistance', - 'PyFuncDistance', 'HaversineDistance'] - +VALID_METRICS = [ + 'BrayCurtisDistance64', + 'CanberraDistance64', + 'ChebyshevDistance64', + 'DiceDistance64', + 'EuclideanDistance64', + 'HammingDistance64', + 'HaversineDistance64', + 'JaccardDistance64', + 'MahalanobisDistance64', + 'ManhattanDistance64', + 'MinkowskiDistance64', + 'PyFuncDistance64', + 'RogersTanimotoDistance64', + 'RussellRaoDistance64', + 'SEuclideanDistance64', + 'SokalMichenerDistance64', + 'SokalSneathDistance64', + 'WMinkowskiDistance64', +] include "_binary_tree.pxi" @@ -129,7 +139,7 @@ cdef inline float64_t min_rdist(BinaryTree tree, intp_t i_node, float64_t* pt) except -1 nogil: """Compute the minimum reduced-distance between a point and a node""" if tree.euclidean: - return euclidean_dist_to_rdist(min_dist(tree, i_node, pt)) + return euclidean_dist_to_rdist64(min_dist(tree, i_node, pt)) else: return tree.dist_metric._dist_to_rdist(min_dist(tree, i_node, pt)) @@ -138,7 +148,7 @@ cdef inline float64_t max_rdist(BinaryTree tree, intp_t i_node, float64_t* pt) except -1: """Compute the maximum reduced-distance between a point and a node""" if tree.euclidean: - return euclidean_dist_to_rdist(max_dist(tree, i_node, pt)) + return euclidean_dist_to_rdist64(max_dist(tree, i_node, pt)) else: return tree.dist_metric._dist_to_rdist(max_dist(tree, i_node, pt)) @@ -167,8 +177,8 @@ cdef inline float64_t min_rdist_dual(BinaryTree tree1, intp_t i_node1, BinaryTree tree2, intp_t i_node2) except -1: """compute the minimum reduced distance between two nodes""" if tree1.euclidean: - return euclidean_dist_to_rdist(min_dist_dual(tree1, i_node1, - tree2, i_node2)) + return euclidean_dist_to_rdist64(min_dist_dual(tree1, i_node1, + tree2, i_node2)) else: return tree1.dist_metric._dist_to_rdist(min_dist_dual(tree1, i_node1, tree2, i_node2)) @@ -178,8 +188,8 @@ cdef inline float64_t max_rdist_dual(BinaryTree tree1, intp_t i_node1, BinaryTree tree2, intp_t i_node2) except -1: """compute the maximum reduced distance between two nodes""" if tree1.euclidean: - return euclidean_dist_to_rdist(max_dist_dual(tree1, i_node1, - tree2, i_node2)) + return euclidean_dist_to_rdist64(max_dist_dual(tree1, i_node1, + tree2, i_node2)) else: return tree1.dist_metric._dist_to_rdist(max_dist_dual(tree1, i_node1, tree2, i_node2)) diff --git a/sklearn/neighbors/_binary_tree.pxi b/sklearn/neighbors/_binary_tree.pxi index 36fb03ce2afe5..7d4d08b2703a4 100644 --- a/sklearn/neighbors/_binary_tree.pxi +++ b/sklearn/neighbors/_binary_tree.pxi @@ -153,9 +153,10 @@ import warnings from ..metrics._dist_metrics cimport ( DistanceMetric, - euclidean_dist, - euclidean_rdist, - euclidean_dist_to_rdist, + DistanceMetric64, + euclidean_dist64, + euclidean_rdist64, + euclidean_dist_to_rdist64, ) from ._partition_nodes cimport partition_node_indices @@ -231,7 +232,7 @@ leaf_size : positive int, default=40 satisfy ``leaf_size <= n_points <= 2 * leaf_size``, except in the case that ``n_samples < leaf_size``. -metric : str or DistanceMetric object, default='minkowski' +metric : str or DistanceMetric64 object, default='minkowski' Metric to use for distance computation. Default is "minkowski", which results in the standard Euclidean distance when p = 2. A list of valid metrics for {BinaryTree} is given by @@ -782,7 +783,7 @@ cdef class BinaryTree: cdef intp_t n_levels cdef intp_t n_nodes - cdef DistanceMetric dist_metric + cdef DistanceMetric64 dist_metric cdef int euclidean # variables to keep track of building & querying stats @@ -832,7 +833,7 @@ cdef class BinaryTree: self.dist_metric = DistanceMetric.get_metric(metric, **kwargs) self.euclidean = (self.dist_metric.__class__.__name__ - == 'EuclideanDistance') + == 'EuclideanDistance64') metric = self.dist_metric.__class__.__name__ if metric not in VALID_METRICS: @@ -922,7 +923,7 @@ cdef class BinaryTree: sample_weight = state[12] self.euclidean = (self.dist_metric.__class__.__name__ - == 'EuclideanDistance') + == 'EuclideanDistance64') n_samples = self.data.shape[0] self._update_sample_weight(n_samples, sample_weight) @@ -996,7 +997,7 @@ cdef class BinaryTree: """Compute the distance between arrays x1 and x2""" self.n_calls += 1 if self.euclidean: - return euclidean_dist(x1, x2, size) + return euclidean_dist64(x1, x2, size) else: return self.dist_metric.dist(x1, x2, size) @@ -1011,7 +1012,7 @@ cdef class BinaryTree: """ self.n_calls += 1 if self.euclidean: - return euclidean_rdist(x1, x2, size) + return euclidean_rdist64(x1, x2, size) else: return self.dist_metric.rdist(x1, x2, size) diff --git a/sklearn/neighbors/_distance_metric.py b/sklearn/neighbors/_distance_metric.py deleted file mode 100644 index 9bfd131c482d8..0000000000000 --- a/sklearn/neighbors/_distance_metric.py +++ /dev/null @@ -1,22 +0,0 @@ -# TODO: Remove this file in 1.3 -import warnings - -from ..metrics import DistanceMetric as _DistanceMetric - - -class DistanceMetric(_DistanceMetric): - @classmethod - def _warn(cls): - warnings.warn( - ( - "sklearn.neighbors.DistanceMetric has been moved " - "to sklearn.metrics.DistanceMetric in 1.0. " - "This import path will be removed in 1.3" - ), - category=FutureWarning, - ) - - @classmethod - def get_metric(cls, metric, **kwargs): - DistanceMetric._warn() - return _DistanceMetric.get_metric(metric, **kwargs) diff --git a/sklearn/neighbors/_kd_tree.pyx b/sklearn/neighbors/_kd_tree.pyx index ece293229fa10..f5cd2617be147 100644 --- a/sklearn/neighbors/_kd_tree.pyx +++ b/sklearn/neighbors/_kd_tree.pyx @@ -6,8 +6,8 @@ __all__ = ['KDTree'] DOC_DICT = {'BinaryTree': 'KDTree', 'binary_tree': 'kd_tree'} -VALID_METRICS = ['EuclideanDistance', 'ManhattanDistance', - 'ChebyshevDistance', 'MinkowskiDistance'] +VALID_METRICS = ['EuclideanDistance64', 'ManhattanDistance64', + 'ChebyshevDistance64', 'MinkowskiDistance64'] include "_binary_tree.pxi" diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 118021af90750..6a285b9fdce3f 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -2120,18 +2120,8 @@ def test_auto_algorithm(X, metric, metric_params, expected_algo): assert model._fit_method == expected_algo -# TODO: Remove in 1.3 -def test_neighbors_distance_metric_deprecation(): - from sklearn.neighbors import DistanceMetric - from sklearn.metrics import DistanceMetric as ActualDistanceMetric - - msg = r"This import path will be removed in 1\.3" - with pytest.warns(FutureWarning, match=msg): - dist_metric = DistanceMetric.get_metric("euclidean") - - assert isinstance(dist_metric, ActualDistanceMetric) - - +# TODO: Remove filterwarnings in 1.3 when wminkowski is removed +@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn") @pytest.mark.parametrize( "metric", sorted(set(neighbors.VALID_METRICS["brute"]) - set(["precomputed"])) )