From f4f33dc662eb090e385ac6639bae75a5c5e93cb2 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 29 Sep 2022 10:05:52 +0200 Subject: [PATCH 1/6] Reword 'radius_neighborhood' for 'radius_neigbors' --- .gitignore | 4 +-- setup.cfg | 4 +-- setup.py | 2 +- .../_dispatcher.py | 2 +- ...orhood.pxd.tp => _radius_neighbors.pxd.tp} | 0 ...orhood.pyx.tp => _radius_neighbors.pyx.tp} | 0 .../_pairwise_distances_reduction/setup.py | 6 ++-- .../test_pairwise_distances_reduction.py | 28 +++++++++---------- sklearn/neighbors/tests/test_neighbors.py | 4 +-- 9 files changed, 25 insertions(+), 25 deletions(-) rename sklearn/metrics/_pairwise_distances_reduction/{_radius_neighborhood.pxd.tp => _radius_neighbors.pxd.tp} (100%) rename sklearn/metrics/_pairwise_distances_reduction/{_radius_neighborhood.pyx.tp => _radius_neighbors.pyx.tp} (100%) diff --git a/.gitignore b/.gitignore index 24f562af3df15..68299a202b7c7 100644 --- a/.gitignore +++ b/.gitignore @@ -95,5 +95,5 @@ sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx -sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pxd -sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pyx +sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd +sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx diff --git a/setup.cfg b/setup.cfg index 81fbbffadb233..3aa89052a4b8e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -79,8 +79,8 @@ ignore = sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx - sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pxd - sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pyx + sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd + sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx [codespell] diff --git a/setup.py b/setup.py index a22df6e647a8e..cc5b6f11749cb 100755 --- a/setup.py +++ b/setup.py @@ -92,7 +92,7 @@ "sklearn.metrics._pairwise_distances_reduction._gemm_term_computer", "sklearn.metrics._pairwise_distances_reduction._base", "sklearn.metrics._pairwise_distances_reduction._argkmin", - "sklearn.metrics._pairwise_distances_reduction._radius_neighborhood", + "sklearn.metrics._pairwise_distances_reduction._radius_neighbors", "sklearn.metrics._pairwise_fast", "sklearn.neighbors._partition_nodes", "sklearn.tree._splitter", diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py index 809d683b52ced..d9875c9d0fc6d 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py +++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py @@ -16,7 +16,7 @@ ArgKmin64, ArgKmin32, ) -from ._radius_neighborhood import ( +from ._radius_neighbors import ( RadiusNeighbors64, RadiusNeighbors32, ) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp similarity index 100% rename from sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pxd.tp rename to sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp similarity index 100% rename from sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pyx.tp rename to sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp diff --git a/sklearn/metrics/_pairwise_distances_reduction/setup.py b/sklearn/metrics/_pairwise_distances_reduction/setup.py index f55ec659b5821..e1fbbceea7eb8 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/setup.py +++ b/sklearn/metrics/_pairwise_distances_reduction/setup.py @@ -21,8 +21,8 @@ def configuration(parent_package="", top_path=None): "sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp", "sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp", "sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp", - "sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pyx.tp", - "sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pxd.tp", + "sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp", + "sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp", ] gen_from_templates(templates) @@ -32,7 +32,7 @@ def configuration(parent_package="", top_path=None): "_gemm_term_computer.pyx", "_base.pyx", "_argkmin.pyx", - "_radius_neighborhood.pyx", + "_radius_neighbors.pyx", ] for source_file in cython_sources: diff --git a/sklearn/metrics/tests/test_pairwise_distances_reduction.py b/sklearn/metrics/tests/test_pairwise_distances_reduction.py index 52f5f3e73948c..c4c2dd55af30b 100644 --- a/sklearn/metrics/tests/test_pairwise_distances_reduction.py +++ b/sklearn/metrics/tests/test_pairwise_distances_reduction.py @@ -185,7 +185,7 @@ def assert_argkmin_results_quasi_equality( ), msg -def assert_radius_neighborhood_results_equality( +def assert_radius_neighbors_results_equality( ref_dist, dist, ref_indices, indices, radius ): # We get arrays of arrays and we need to check for individual pairs @@ -204,7 +204,7 @@ def assert_radius_neighborhood_results_equality( ) -def assert_radius_neighborhood_results_quasi_equality( +def assert_radius_neighbors_results_quasi_equality( ref_dist, dist, ref_indices, @@ -308,7 +308,7 @@ def assert_radius_neighborhood_results_quasi_equality( ( RadiusNeighbors, np.float64, - ): assert_radius_neighborhood_results_equality, + ): assert_radius_neighbors_results_equality, # In the case of 32bit, indices can be permuted due to small difference # in the computations of their associated distances, hence we test equality of # results up to valid permutations. @@ -316,7 +316,7 @@ def assert_radius_neighborhood_results_quasi_equality( ( RadiusNeighbors, np.float32, - ): assert_radius_neighborhood_results_quasi_equality, + ): assert_radius_neighbors_results_quasi_equality, } @@ -404,7 +404,7 @@ def test_assert_argkmin_results_quasi_equality(): ) -def test_assert_radius_neighborhood_results_quasi_equality(): +def test_assert_radius_neighbors_results_quasi_equality(): rtol = 1e-7 eps = 1e-7 @@ -425,7 +425,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): ] # Sanity check: compare the reference results to themselves. - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( ref_dist, ref_dist, ref_indices, @@ -435,7 +435,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): ) # Apply valid permutation on indices - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1, 2, 3, 4, 5])]), @@ -443,7 +443,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): radius=6.1, rtol=rtol, ) - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( np.array([np.array([_1m, _1m, 1, _1p, _1p])]), np.array([np.array([_1m, _1m, 1, _1p, _1p])]), np.array([np.array([6, 7, 8, 9, 10])]), @@ -455,7 +455,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): # Apply invalid permutation on indices msg = "Neighbors indices for query 0 are not matching" with pytest.raises(AssertionError, match=msg): - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1, 2, 3, 4, 5])]), @@ -465,7 +465,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): ) # Having extra last elements is valid if they are in: [radius ± rtol] - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1.2, 2.5, _6_1m, 6.1])]), np.array([np.array([1, 2, 3, 4, 5])]), @@ -479,7 +479,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): "The last extra elements ([6.]) aren't in [radius ± rtol]=[6.1 ± 1e-07]" ) with pytest.raises(AssertionError, match=msg): - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( np.array([np.array([1.2, 2.5, 6])]), np.array([np.array([1.2, 2.5])]), np.array([np.array([1, 2, 3])]), @@ -491,7 +491,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): # Indices aren't properly sorted w.r.t their distances msg = "Neighbors indices for query 0 are not matching" with pytest.raises(AssertionError, match=msg): - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1, 2, 3, 4, 5])]), @@ -503,7 +503,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): # Distances aren't properly sorted msg = "Distances aren't sorted on row 0" with pytest.raises(AssertionError, match=msg): - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([2.5, 1.2, _6_1m, 6.1, _6_1p])]), np.array([np.array([1, 2, 3, 4, 5])]), @@ -631,7 +631,7 @@ def test_argkmin_factory_method_wrong_usages(): ) -def test_radius_neighborhood_factory_method_wrong_usages(): +def test_radius_neighbors_factory_method_wrong_usages(): rng = np.random.RandomState(1) X = rng.rand(100, 10) Y = rng.rand(100, 10) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 0a75b57cc60ae..4cea0bbb00a5f 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -29,7 +29,7 @@ from sklearn.metrics.pairwise import pairwise_distances from sklearn.metrics.tests.test_dist_metrics import BOOL_METRICS from sklearn.metrics.tests.test_pairwise_distances_reduction import ( - assert_radius_neighborhood_results_equality, + assert_radius_neighbors_results_equality, ) from sklearn.model_selection import cross_val_score from sklearn.model_selection import train_test_split @@ -2174,7 +2174,7 @@ def test_radius_neighbors_brute_backend( X_test, return_distance=True ) - assert_radius_neighborhood_results_equality( + assert_radius_neighbors_results_equality( legacy_brute_dst, pdr_brute_dst, legacy_brute_idx, From e002bc7327c952ae1fbde558f4f647c80cbeb501 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 29 Sep 2022 10:09:41 +0200 Subject: [PATCH 2/6] Reword to use BaseDistancesReduction --- .../_pairwise_distances_reduction/__init__.py | 10 +++--- .../_argkmin.pxd.tp | 6 ++-- .../_argkmin.pyx.tp | 6 ++-- .../_base.pxd.tp | 2 +- .../_base.pyx.tp | 4 +-- .../_datasets_pair.pyx.tp | 2 +- .../_dispatcher.py | 8 ++--- .../_radius_neighbors.pxd.tp | 6 ++-- .../_radius_neighbors.pyx.tp | 6 ++-- .../test_pairwise_distances_reduction.py | 32 +++++++++---------- 10 files changed, 41 insertions(+), 41 deletions(-) diff --git a/sklearn/metrics/_pairwise_distances_reduction/__init__.py b/sklearn/metrics/_pairwise_distances_reduction/__init__.py index 1aebb8bc4a572..db21d066ab1a6 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/__init__.py +++ b/sklearn/metrics/_pairwise_distances_reduction/__init__.py @@ -32,7 +32,7 @@ # # Dispatchers are meant to be used in the Python code. Under the hood, a # dispatcher must only define the logic to choose at runtime to the correct -# dtype-specialized :class:`BaseDistanceReductionDispatcher` implementation based +# dtype-specialized :class:`BaseDistancesReductionDispatcher` implementation based # on the dtype of X and of Y. # # @@ -46,7 +46,7 @@ # # # (base dispatcher) -# BaseDistanceReductionDispatcher +# BaseDistancesReductionDispatcher # ∆ # | # | @@ -57,7 +57,7 @@ # | | # | | # | (64bit implem.) | -# | BaseDistanceReducer{32,64} | +# | BaseDistancesReduction{32,64} | # | ∆ | # | | | # | | | @@ -87,14 +87,14 @@ from ._dispatcher import ( - BaseDistanceReductionDispatcher, + BaseDistancesReductionDispatcher, ArgKmin, RadiusNeighbors, sqeuclidean_row_norms, ) __all__ = [ - "BaseDistanceReductionDispatcher", + "BaseDistancesReductionDispatcher", "ArgKmin", "RadiusNeighbors", "sqeuclidean_row_norms", diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp index 37dd7f5836857..0f8b5a756bbfd 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp @@ -21,11 +21,11 @@ cnp.import_array() {{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} -from ._base cimport BaseDistanceReducer{{name_suffix}} +from ._base cimport BaseDistancesReduction{{name_suffix}} from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} -cdef class ArgKmin{{name_suffix}}(BaseDistanceReducer{{name_suffix}}): - """{{name_suffix}}bit implementation of BaseDistanceReducer{{name_suffix}} for the `ArgKmin` reduction.""" +cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): + """{{name_suffix}}bit implementation of BaseDistancesReduction{{name_suffix}} for the `ArgKmin` reduction.""" cdef: ITYPE_t k diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp index 1c1459e27f210..bf948b5d67f6e 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp @@ -40,7 +40,7 @@ cnp.import_array() {{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} from ._base cimport ( - BaseDistanceReducer{{name_suffix}}, + BaseDistancesReduction{{name_suffix}}, _sqeuclidean_row_norms{{name_suffix}}, ) @@ -52,8 +52,8 @@ from ._datasets_pair cimport ( from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} -cdef class ArgKmin{{name_suffix}}(BaseDistanceReducer{{name_suffix}}): - """{{name_suffix}}bit implementation of the pairwise-distance reduction BaseDistanceReducer{{name_suffix}}.""" +cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): + """{{name_suffix}}bit implementation of the pairwise-distance reduction BaseDistancesReduction{{name_suffix}}.""" @classmethod def compute( diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp index 77a689a9e4e94..24aeff4013c69 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp @@ -30,7 +30,7 @@ cpdef DTYPE_t[::1] _sqeuclidean_row_norms{{name_suffix}}( ITYPE_t num_threads, ) -cdef class BaseDistanceReducer{{name_suffix}}: +cdef class BaseDistancesReduction{{name_suffix}}: """ Base {{name_suffix}}bit implementation template of the pairwise-distances reduction backend. diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp index d0c06de0f8761..c6b344c66004a 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp @@ -105,7 +105,7 @@ cpdef DTYPE_t[::1] _sqeuclidean_row_norms32( from ._datasets_pair cimport DatasetsPair{{name_suffix}} -cdef class BaseDistanceReducer{{name_suffix}}: +cdef class BaseDistancesReduction{{name_suffix}}: """ Base {{name_suffix}}bit implementation template of the pairwise-distances reduction backend. @@ -341,7 +341,7 @@ cdef class BaseDistanceReducer{{name_suffix}}: ) nogil: """Compute the pairwise distances on two chunks of X and Y and reduce them. - This is THE core computational method of BaseDistanceReducer{{name_suffix}}. + This is THE core computational method of BaseDistancesReduction{{name_suffix}}. This must be implemented in subclasses agnostically from the parallelization strategies. """ diff --git a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp index cfa37a004f17a..78857341f9c97 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp @@ -35,7 +35,7 @@ cdef class DatasetsPair{{name_suffix}}: The handling of parallelization over chunks to compute the distances and aggregation for several rows at a time is done in dedicated - subclasses of :class:`BaseDistanceReductionDispatcher` that in-turn rely on + subclasses of :class:`BaseDistancesReductionDispatcher` that in-turn rely on subclasses of :class:`DatasetsPair` for each pair of rows in the data. The goal is to make it possible to decouple the generic parallelization and aggregation logic from metric-specific computation as much as possible. diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py index d9875c9d0fc6d..140a0a81b2a06 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py +++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py @@ -51,10 +51,10 @@ def sqeuclidean_row_norms(X, num_threads): ) -class BaseDistanceReductionDispatcher: +class BaseDistancesReductionDispatcher: """Abstract base dispatcher for pairwise distance computation & reduction. - Each dispatcher extending the base :class:`BaseDistanceReductionDispatcher` + Each dispatcher extending the base :class:`BaseDistancesReductionDispatcher` dispatcher must implement the :meth:`compute` classmethod. """ @@ -165,7 +165,7 @@ def compute( """ -class ArgKmin(BaseDistanceReductionDispatcher): +class ArgKmin(BaseDistancesReductionDispatcher): """Compute the argkmin of row vectors of X on the ones of Y. For each row vector of X, computes the indices of k first the rows @@ -309,7 +309,7 @@ def compute( ) -class RadiusNeighbors(BaseDistanceReductionDispatcher): +class RadiusNeighbors(BaseDistancesReductionDispatcher): """Compute radius-based neighbors for two sets of vectors. For each row-vector X[i] of the queries X, find all the indices j of diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp index 9bbe186589f20..631211186d7d5 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp @@ -43,12 +43,12 @@ cdef cnp.ndarray[object, ndim=1] coerce_vectors_to_nd_arrays( ##################### {{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} -from ._base cimport BaseDistanceReducer{{name_suffix}} +from ._base cimport BaseDistancesReduction{{name_suffix}} from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} -cdef class RadiusNeighbors{{name_suffix}}(BaseDistanceReducer{{name_suffix}}): +cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): """ - {{name_suffix}}bit implementation of BaseDistanceReducer{{name_suffix}} for the + {{name_suffix}}bit implementation of BaseDistancesReduction{{name_suffix}} for the `RadiusNeighbors` reduction. """ diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp index 8d50a6e04abf9..c157d7ba877ce 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp @@ -59,7 +59,7 @@ cdef cnp.ndarray[object, ndim=1] coerce_vectors_to_nd_arrays( {{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} from ._base cimport ( - BaseDistanceReducer{{name_suffix}}, + BaseDistancesReduction{{name_suffix}}, _sqeuclidean_row_norms{{name_suffix}} ) @@ -71,9 +71,9 @@ from ._datasets_pair cimport ( from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} -cdef class RadiusNeighbors{{name_suffix}}(BaseDistanceReducer{{name_suffix}}): +cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): """ - {{name_suffix}}bit implementation of the pairwise-distance reduction BaseDistanceReducer{{name_suffix}} for the + {{name_suffix}}bit implementation of the pairwise-distance reduction BaseDistancesReduction{{name_suffix}} for the `RadiusNeighbors` reduction. """ diff --git a/sklearn/metrics/tests/test_pairwise_distances_reduction.py b/sklearn/metrics/tests/test_pairwise_distances_reduction.py index c4c2dd55af30b..f929a55105509 100644 --- a/sklearn/metrics/tests/test_pairwise_distances_reduction.py +++ b/sklearn/metrics/tests/test_pairwise_distances_reduction.py @@ -10,7 +10,7 @@ from scipy.spatial.distance import cdist from sklearn.metrics._pairwise_distances_reduction import ( - BaseDistanceReductionDispatcher, + BaseDistancesReductionDispatcher, ArgKmin, RadiusNeighbors, sqeuclidean_row_norms, @@ -522,33 +522,33 @@ def test_pairwise_distances_reduction_is_usable_for(): metric = "manhattan" # Must be usable for all possible pair of {dense, sparse} datasets - assert BaseDistanceReductionDispatcher.is_usable_for(X, Y, metric) - assert BaseDistanceReductionDispatcher.is_usable_for(X_csr, Y_csr, metric) - assert BaseDistanceReductionDispatcher.is_usable_for(X_csr, Y, metric) - assert BaseDistanceReductionDispatcher.is_usable_for(X, Y_csr, metric) + assert BaseDistancesReductionDispatcher.is_usable_for(X, Y, metric) + assert BaseDistancesReductionDispatcher.is_usable_for(X_csr, Y_csr, metric) + assert BaseDistancesReductionDispatcher.is_usable_for(X_csr, Y, metric) + assert BaseDistancesReductionDispatcher.is_usable_for(X, Y_csr, metric) - assert BaseDistanceReductionDispatcher.is_usable_for( + assert BaseDistancesReductionDispatcher.is_usable_for( X.astype(np.float64), Y.astype(np.float64), metric ) - assert BaseDistanceReductionDispatcher.is_usable_for( + assert BaseDistancesReductionDispatcher.is_usable_for( X.astype(np.float32), Y.astype(np.float32), metric ) - assert not BaseDistanceReductionDispatcher.is_usable_for( + assert not BaseDistancesReductionDispatcher.is_usable_for( X.astype(np.int64), Y.astype(np.int64), metric ) - assert not BaseDistanceReductionDispatcher.is_usable_for(X, Y, metric="pyfunc") - assert not BaseDistanceReductionDispatcher.is_usable_for( + assert not BaseDistancesReductionDispatcher.is_usable_for(X, Y, metric="pyfunc") + assert not BaseDistancesReductionDispatcher.is_usable_for( X.astype(np.float32), Y, metric ) - assert not BaseDistanceReductionDispatcher.is_usable_for( + assert not BaseDistancesReductionDispatcher.is_usable_for( X, Y.astype(np.int32), metric ) # F-ordered arrays are not supported - assert not BaseDistanceReductionDispatcher.is_usable_for( + assert not BaseDistancesReductionDispatcher.is_usable_for( np.asfortranarray(X), Y, metric ) @@ -558,17 +558,17 @@ def test_pairwise_distances_reduction_is_usable_for(): # See: https://github.com/scikit-learn/scikit-learn/pull/23585#issuecomment-1247996669 # noqa # TODO: implement specialisation for (sq)euclidean on fused sparse-dense # using sparse-dense routines for matrix-vector multiplications. - assert not BaseDistanceReductionDispatcher.is_usable_for( + assert not BaseDistancesReductionDispatcher.is_usable_for( X_csr, Y, metric="euclidean" ) - assert not BaseDistanceReductionDispatcher.is_usable_for( + assert not BaseDistancesReductionDispatcher.is_usable_for( X_csr, Y_csr, metric="sqeuclidean" ) # CSR matrices without non-zeros elements aren't currently supported # TODO: support CSR matrices without non-zeros elements X_csr_0_nnz = csr_matrix(X * 0) - assert not BaseDistanceReductionDispatcher.is_usable_for(X_csr_0_nnz, Y, metric) + assert not BaseDistancesReductionDispatcher.is_usable_for(X_csr_0_nnz, Y, metric) # CSR matrices with int64 indices and indptr (e.g. large nnz, or large n_features) # aren't supported as of now. @@ -576,7 +576,7 @@ def test_pairwise_distances_reduction_is_usable_for(): # TODO: support CSR matrices with int64 indices and indptr X_csr_int64 = csr_matrix(X) X_csr_int64.indices = X_csr_int64.indices.astype(np.int64) - assert not BaseDistanceReductionDispatcher.is_usable_for(X_csr_int64, Y, metric) + assert not BaseDistancesReductionDispatcher.is_usable_for(X_csr_int64, Y, metric) def test_argkmin_factory_method_wrong_usages(): From 87057b118aed57cddb8033544eb7af33e29231a4 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 29 Sep 2022 10:38:23 +0200 Subject: [PATCH 3/6] Clarify comments regarding float32 and float64 --- .../_pairwise_distances_reduction/__init__.py | 8 +-- .../_argkmin.pxd.tp | 4 +- .../_argkmin.pyx.tp | 4 +- .../_base.pxd.tp | 4 +- .../_base.pyx.tp | 60 +++++++++++++++---- .../_gemm_term_computer.pxd.tp | 2 +- .../_gemm_term_computer.pyx.tp | 2 +- .../_radius_neighbors.pxd.tp | 7 +-- .../_radius_neighbors.pyx.tp | 7 +-- 9 files changed, 63 insertions(+), 35 deletions(-) diff --git a/sklearn/metrics/_pairwise_distances_reduction/__init__.py b/sklearn/metrics/_pairwise_distances_reduction/__init__.py index db21d066ab1a6..133c854682f0c 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/__init__.py +++ b/sklearn/metrics/_pairwise_distances_reduction/__init__.py @@ -56,7 +56,7 @@ # ArgKmin RadiusNeighbors # | | # | | -# | (64bit implem.) | +# | (float{32,64} implem.) | # | BaseDistancesReduction{32,64} | # | ∆ | # | | | @@ -74,9 +74,9 @@ # x | | x # EuclideanArgKmin{32,64} EuclideanRadiusNeighbors{32,64} # -# For instance :class:`ArgKmin`, dispatches to both :class:`ArgKmin64` -# and :class:`ArgKmin32` if X and Y are both dense NumPy arrays with a `float64` -# or `float32` dtype respectively. +# For instance :class:`ArgKmin` dispatches to: +# - :class:`ArgKmin64` if X and Y are two `float64` array-likes +# - :class:`ArgKmin32` if X and Y are two `float32` array-likes # # In addition, if the metric parameter is set to "euclidean" or "sqeuclidean", # then `ArgKmin{32,64}` further dispatches to `EuclideanArgKmin{32,64}`. For diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp index 0f8b5a756bbfd..bf304beb52891 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp @@ -25,7 +25,7 @@ from ._base cimport BaseDistancesReduction{{name_suffix}} from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): - """{{name_suffix}}bit implementation of BaseDistancesReduction{{name_suffix}} for the `ArgKmin` reduction.""" + """float{{name_suffix}} implementation of the ArgKmin.""" cdef: ITYPE_t k @@ -39,7 +39,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): - """EuclideanDistance-specialized {{name_suffix}}bit implementation of ArgKmin{{name_suffix}}.""" + """EuclideanDistance-specialisation of ArgKmin{{name_suffix}}.""" cdef: GEMMTermComputer{{name_suffix}} gemm_term_computer const DTYPE_t[::1] X_norm_squared diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp index bf948b5d67f6e..1189f1f5cd682 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp @@ -53,7 +53,7 @@ from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): - """{{name_suffix}}bit implementation of the pairwise-distance reduction BaseDistancesReduction{{name_suffix}}.""" + """float{{name_suffix}} implementation of the ArgKmin.""" @classmethod def compute( @@ -328,7 +328,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): - """EuclideanDistance-specialized implementation for ArgKmin{{name_suffix}}.""" + """EuclideanDistance-specialisation of ArgKmin{{name_suffix}}.""" @classmethod def is_usable_for(cls, X, Y, metric) -> bool: diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp index 24aeff4013c69..b5e4261c2217a 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp @@ -32,8 +32,8 @@ cpdef DTYPE_t[::1] _sqeuclidean_row_norms{{name_suffix}}( cdef class BaseDistancesReduction{{name_suffix}}: """ - Base {{name_suffix}}bit implementation template of the pairwise-distances reduction - backend. + Base float{{name_suffix}} implementation template of the pairwise-distances + reduction backends. Implementations inherit from this template and may override the several defined hooks as needed in order to easily extend functionality with diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp index c6b344c66004a..aeb28e8103b6a 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp @@ -82,7 +82,7 @@ cpdef DTYPE_t[::1] _sqeuclidean_row_norms32( ITYPE_t d = X.shape[1] DTYPE_t[::1] squared_row_norms = np.empty(n, dtype=DTYPE) - # To upcast the i-th row of X from 32bit to 64bit + # To upcast the i-th row of X from float32 to float64 vector[vector[DTYPE_t]] X_i_upcast = vector[vector[DTYPE_t]]( num_threads, vector[DTYPE_t](d) ) @@ -90,7 +90,7 @@ cpdef DTYPE_t[::1] _sqeuclidean_row_norms32( with nogil, parallel(num_threads=num_threads): thread_num = openmp.omp_get_thread_num() for i in prange(n, schedule='static'): - # Upcasting the i-th row of X from 32bit to 64bit + # Upcasting the i-th row of X from float32 to float64 for j in range(d): X_i_upcast[thread_num][j] = deref(X_ptr + i * d + j) @@ -107,8 +107,8 @@ from ._datasets_pair cimport DatasetsPair{{name_suffix}} cdef class BaseDistancesReduction{{name_suffix}}: """ - Base {{name_suffix}}bit implementation template of the pairwise-distances reduction - backend. + Base float{{name_suffix}} implementation template of the pairwise-distances + reduction backends. Implementations inherit from this template and may override the several defined hooks as needed in order to easily extend functionality with @@ -224,7 +224,6 @@ cdef class BaseDistancesReduction{{name_suffix}}: X_end = X_start + self.X_n_samples_chunk # Reinitializing thread datastructures for the new X chunk - # If necessary, upcast X[X_start:X_end] to 64bit self._parallel_on_X_init_chunk(thread_num, X_start, X_end) for Y_chunk_idx in range(self.Y_n_chunks): @@ -234,7 +233,6 @@ cdef class BaseDistancesReduction{{name_suffix}}: else: Y_end = Y_start + self.Y_n_samples_chunk - # If necessary, upcast Y[Y_start:Y_end] to 64bit self._parallel_on_X_pre_compute_and_reduce_distances_on_chunks( X_start, X_end, Y_start, Y_end, @@ -295,7 +293,6 @@ cdef class BaseDistancesReduction{{name_suffix}}: thread_num = _openmp_thread_num() # Initializing datastructures used in this thread - # If necessary, upcast X[X_start:X_end] to 64bit self._parallel_on_Y_parallel_init(thread_num, X_start, X_end) for Y_chunk_idx in prange(self.Y_n_chunks, schedule='static'): @@ -305,7 +302,6 @@ cdef class BaseDistancesReduction{{name_suffix}}: else: Y_end = Y_start + self.Y_n_samples_chunk - # If necessary, upcast Y[Y_start:Y_end] to 64bit self._parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( X_start, X_end, Y_start, Y_end, @@ -373,7 +369,18 @@ cdef class BaseDistancesReduction{{name_suffix}}: ITYPE_t X_start, ITYPE_t X_end, ) nogil: - """Initialize datastructures used in a thread given its number.""" + """Initialize datastructures used in a thread given its number. + + EuclideanDistance specialisations of subclass of BaseDistancesReduction _must_ + in this method call: + + self.gemm_term_computer._parallel_on_X_init_chunk( + thread_num, X_start, X_end, + ) + + to ensure for the float32 case, the proper upcast of X[X_start:X_end] to + dedicated float64 buffers prior the reduction. + """ return cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( @@ -386,7 +393,15 @@ cdef class BaseDistancesReduction{{name_suffix}}: ) nogil: """Initialize datastructures just before the _compute_and_reduce_distances_on_chunks. - This is eventually used to upcast X[X_start:X_end] to 64bit. + EuclideanDistance specialisations of subclass of BaseDistancesReduction _must_ + in this method call: + + self.gemm_term_computer._parallel_on_X_pre_compute_and_reduce_distances_on_chunks( + X_start, X_end, Y_start, Y_end, thread_num, + ) + + to ensure for the float32 case, the proper upcast of Y[Y_start:Y_end] to + dedicated float64 buffers prior the reduction. """ return @@ -418,7 +433,18 @@ cdef class BaseDistancesReduction{{name_suffix}}: ITYPE_t X_start, ITYPE_t X_end, ) nogil: - """Initialize datastructures used in a thread given its number.""" + """Initialize datastructures used in a thread given its number. + + EuclideanDistance specialisations of subclass of BaseDistancesReduction _must_ + in this method call: + + self.gemm_term_computer._parallel_on_Y_parallel_init( + thread_num, X_start, X_end, + ) + + to ensure for the float32 case, the proper upcast of X[X_start:X_end] to + dedicated float64 buffers prior the reduction. + """ return cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( @@ -431,8 +457,16 @@ cdef class BaseDistancesReduction{{name_suffix}}: ) nogil: """Initialize datastructures just before the _compute_and_reduce_distances_on_chunks. - This is eventually used to upcast Y[Y_start:Y_end] to 64bit. - """ + EuclideanDistance specialisations of subclass of BaseDistancesReduction _must_ + in this method call: + + self.gemm_term_computer._parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( + X_start, X_end, Y_start, Y_end, thread_num, + ) + + to ensure for the float32 case, the proper upcast of Y[Y_start:Y_end] to + dedicated float64 buffers prior the reduction. + """"" return cdef void _parallel_on_Y_synchronize( diff --git a/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd.tp index 5978cfee9ebee..f0599185a1b64 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd.tp @@ -35,7 +35,7 @@ cdef class GEMMTermComputer{{name_suffix}}: vector[vector[DTYPE_t]] dist_middle_terms_chunks {{if upcast_to_float64}} - # Buffers for upcasting chunks of X and Y from 32bit to 64bit + # Buffers for upcasting chunks of X and Y from float32 to float64 vector[vector[DTYPE_t]] X_c_upcast vector[vector[DTYPE_t]] Y_c_upcast {{endif}} diff --git a/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx.tp index 35e57219a96a7..37c482c961627 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx.tp @@ -66,7 +66,7 @@ cdef class GEMMTermComputer{{name_suffix}}: self.dist_middle_terms_chunks = vector[vector[DTYPE_t]](self.effective_n_threads) {{if upcast_to_float64}} - # We populate the buffer for upcasting chunks of X and Y from 32bit to 64bit. + # We populate the buffer for upcasting chunks of X and Y from float32 to float64. self.X_c_upcast = vector[vector[DTYPE_t]](self.effective_n_threads) self.Y_c_upcast = vector[vector[DTYPE_t]](self.effective_n_threads) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp index 631211186d7d5..2d72c8d88a4c0 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp @@ -47,10 +47,7 @@ from ._base cimport BaseDistancesReduction{{name_suffix}} from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): - """ - {{name_suffix}}bit implementation of BaseDistancesReduction{{name_suffix}} for the - `RadiusNeighbors` reduction. - """ + """float{{name_suffix}} implementation of the RadiusNeighbors.""" cdef: DTYPE_t radius @@ -97,7 +94,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}) cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix}}): - """EuclideanDistance-specialized {{name_suffix}}bit implementation for RadiusNeighbors{{name_suffix}}.""" + """EuclideanDistance-specialisation of RadiusNeighbors{{name_suffix}}.""" cdef: GEMMTermComputer{{name_suffix}} gemm_term_computer const DTYPE_t[::1] X_norm_squared diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp index c157d7ba877ce..1890d60b20b20 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp @@ -72,10 +72,7 @@ from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): - """ - {{name_suffix}}bit implementation of the pairwise-distance reduction BaseDistancesReduction{{name_suffix}} for the - `RadiusNeighbors` reduction. - """ + """float{{name_suffix}} implementation of the RadiusNeighbors.""" @classmethod def compute( @@ -335,7 +332,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}) cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix}}): - """EuclideanDistance-specialized implementation for RadiusNeighbors{{name_suffix}}.""" + """EuclideanDistance-specialisation of RadiusNeighbors{{name_suffix}}.""" @classmethod def is_usable_for(cls, X, Y, metric) -> bool: From d275dcaacee8b0de2c1b4bd7785f61f95fc6209d Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 13 Oct 2022 09:13:27 +0200 Subject: [PATCH 4/6] DOC Reword comments about float64 upcast Co-authored-by: Olivier Grisel --- .../metrics/_pairwise_distances_reduction/_base.pyx.tp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp index aeb28e8103b6a..8fff23ef376be 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp @@ -442,8 +442,9 @@ cdef class BaseDistancesReduction{{name_suffix}}: thread_num, X_start, X_end, ) - to ensure for the float32 case, the proper upcast of X[X_start:X_end] to - dedicated float64 buffers prior the reduction. + to ensure the proper upcast of X[X_start:X_end] to float64 prior + to the reduction with float64 accumulator buffers when X.dtype is + float32. """ return @@ -464,8 +465,9 @@ cdef class BaseDistancesReduction{{name_suffix}}: X_start, X_end, Y_start, Y_end, thread_num, ) - to ensure for the float32 case, the proper upcast of Y[Y_start:Y_end] to - dedicated float64 buffers prior the reduction. + to ensure the proper upcast of Y[Y_start:Y_end] to float64 prior + to the reduction with float64 accumulator buffers when Y.dtype is + float32. """"" return From 16437ff571592f700e000000e305aa24687d72d5 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 13 Oct 2022 09:13:27 +0200 Subject: [PATCH 5/6] fixup! DOC Reword comments about float64 upcast Co-authored-by: Olivier Grisel Co-authored-by: Thomas J. Fan --- .../_base.pyx.tp | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp index 21e268bd1ae58..ba0d043b355ed 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp @@ -356,15 +356,16 @@ cdef class BaseDistancesReduction{{name_suffix}}: ) nogil: """Initialize datastructures used in a thread given its number. - EuclideanDistance specialisations of subclass of BaseDistancesReduction _must_ - in this method call: + In this method, EuclideanDistance specialisations of subclass of + BaseDistancesReduction _must_ call: self.gemm_term_computer._parallel_on_X_init_chunk( thread_num, X_start, X_end, ) - to ensure for the float32 case, the proper upcast of X[X_start:X_end] to - dedicated float64 buffers prior the reduction. + to ensure the proper upcast of X[X_start:X_end] to float64 prior + to the reduction with float64 accumulator buffers when X.dtype is + float32. """ return @@ -378,15 +379,16 @@ cdef class BaseDistancesReduction{{name_suffix}}: ) nogil: """Initialize datastructures just before the _compute_and_reduce_distances_on_chunks. - EuclideanDistance specialisations of subclass of BaseDistancesReduction _must_ - in this method call: + In this method, EuclideanDistance specialisations of subclass of + BaseDistancesReduction _must_ call: self.gemm_term_computer._parallel_on_X_pre_compute_and_reduce_distances_on_chunks( X_start, X_end, Y_start, Y_end, thread_num, ) - to ensure for the float32 case, the proper upcast of Y[Y_start:Y_end] to - dedicated float64 buffers prior the reduction. + to ensure the proper upcast of Y[Y_start:Y_end] to float64 prior + to the reduction with float64 accumulator buffers when Y.dtype is + float32. """ return @@ -420,8 +422,8 @@ cdef class BaseDistancesReduction{{name_suffix}}: ) nogil: """Initialize datastructures used in a thread given its number. - EuclideanDistance specialisations of subclass of BaseDistancesReduction _must_ - in this method call: + In this method, EuclideanDistance specialisations of subclass of + BaseDistancesReduction _must_ call: self.gemm_term_computer._parallel_on_Y_parallel_init( thread_num, X_start, X_end, @@ -443,8 +445,8 @@ cdef class BaseDistancesReduction{{name_suffix}}: ) nogil: """Initialize datastructures just before the _compute_and_reduce_distances_on_chunks. - EuclideanDistance specialisations of subclass of BaseDistancesReduction _must_ - in this method call: + In this method, EuclideanDistance specialisations of subclass of + BaseDistancesReduction _must_ call: self.gemm_term_computer._parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( X_start, X_end, Y_start, Y_end, thread_num, From c13c0a15b460c70f5812243c9c098ebda6d12193 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 13 Oct 2022 18:17:34 +0200 Subject: [PATCH 6/6] Update sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp Co-authored-by: Olivier Grisel --- sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp index e9e2c1c58503c..d03c7e5fa0b2a 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp @@ -455,7 +455,7 @@ cdef class BaseDistancesReduction{{name_suffix}}: to ensure the proper upcast of Y[Y_start:Y_end] to float64 prior to the reduction with float64 accumulator buffers when Y.dtype is float32. - """"" + """ return cdef void _parallel_on_Y_synchronize(