Thanks to visit codestin.com
Credit goes to github.com

Skip to content

MAINT PairwiseDistancesReduction: Update comments and remove unused symbols #24625

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Oct 13, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistanceReducer{{name_suffix}}):
ITYPE_t X_end,
) nogil:
cdef:
ITYPE_t idx, jdx
ITYPE_t idx

# Sorting the main heaps portion associated to `X[X_start:X_end]`
# in ascending order w.r.t the distances.
Expand Down Expand Up @@ -287,7 +287,6 @@ cdef class ArgKmin{{name_suffix}}(BaseDistanceReducer{{name_suffix}}):
cdef void compute_exact_distances(self) nogil:
cdef:
ITYPE_t i, j
ITYPE_t[:, ::1] Y_indices = self.argkmin_indices
DTYPE_t[:, ::1] distances = self.argkmin_distances
for i in prange(self.n_samples_X, schedule='static', nogil=True,
num_threads=self.effective_n_threads):
Expand Down Expand Up @@ -432,7 +431,6 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}):
cdef void _parallel_on_Y_init(
self,
) nogil:
cdef ITYPE_t thread_num
ArgKmin{{name_suffix}}._parallel_on_Y_init(self)
self.gemm_term_computer._parallel_on_Y_init()

Expand Down
2 changes: 1 addition & 1 deletion sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ from numbers import Integral
from sklearn import get_config
from sklearn.utils import check_scalar
from ...utils._openmp_helpers import _openmp_effective_n_threads
from ...utils._typedefs import ITYPE, DTYPE
from ...utils._typedefs import DTYPE

cnp.import_array()

Expand Down
44 changes: 16 additions & 28 deletions sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,15 @@ class BaseDistanceReductionDispatcher:
@classmethod
def valid_metrics(cls) -> List[str]:
excluded = {
"pyfunc", # is relatively slow because we need to coerce data as np arrays
# PyFunc cannot be supported because it necessitates interacting with
# the CPython interpreter to call user defined functions.
"pyfunc",
"mahalanobis", # is numerically unstable
# TODO: In order to support discrete distance metrics, we need to have a
# stable simultaneous sort which preserves the order of the input.
# The best might be using std::stable_sort and a Comparator taking an
# Arrays of Structures instead of Structure of Arrays (currently used).
# In order to support discrete distance metrics, we need to have a
# stable simultaneous sort which preserves the order of the indices
# because there generally is a lot of occurrences for a given values
# of distances in this case.
# TODO: implement a stable simultaneous_sort.
"hamming",
*BOOL_METRICS,
}
Expand Down Expand Up @@ -241,8 +244,6 @@ def compute(
'parallel_on_X' is usually the most efficient strategy.
When `X.shape[0]` is small but `Y.shape[0]` is large, 'parallel_on_Y'
brings more opportunity for parallelism and is therefore more efficient
despite the synchronization step at each iteration of the outer loop
on chunks of `X`.

- None (default) looks-up in scikit-learn configuration for
`pairwise_dist_parallel_strategy`, and use 'auto' if it is not set.
Expand All @@ -265,20 +266,14 @@ def compute(

Notes
-----
This classmethod is responsible for introspecting the arguments
values to dispatch to the most appropriate implementation of
:class:`ArgKmin64`.
This classmethod inspects the arguments values to dispatch to the
dtype-specialized implementation of :class:`ArgKmin`.

This allows decoupling the API entirely from the implementation details
whilst maintaining RAII: all temporarily allocated datastructures necessary
for the concrete implementation are therefore freed when this classmethod
returns.
"""
# Note (jjerphan): Some design thoughts for future extensions.
# This factory comes to handle specialisations for the given arguments.
# For future work, this might can be an entrypoint to specialise operations
# for various backend and/or hardware and/or datatypes, and/or fused
# {sparse, dense}-datasetspair etc.
if X.dtype == Y.dtype == np.float64:
return ArgKmin64.compute(
X=X,
Expand Down Expand Up @@ -415,21 +410,14 @@ def compute(

Notes
-----
This public classmethod is responsible for introspecting the arguments
values to dispatch to the private dtype-specialized implementation of
:class:`RadiusNeighbors64`.
This classmethod inspects the arguments values to dispatch to the
dtype-specialized implementation of :class:`RadiusNeighbors`.

All temporarily allocated datastructures necessary for the concrete
implementation are therefore freed when this classmethod returns.

This allows entirely decoupling the API entirely from the
implementation details whilst maintaining RAII.
This allows decoupling the API entirely from the implementation details
whilst maintaining RAII: all temporarily allocated datastructures necessary
for the concrete implementation are therefore freed when this classmethod
returns.
"""
# Note (jjerphan): Some design thoughts for future extensions.
# This factory comes to handle specialisations for the given arguments.
# For future work, this might can be an entrypoint to specialise operations
# for various backend and/or hardware and/or datatypes, and/or fused
# {sparse, dense}-datasetspair etc.
if X.dtype == Y.dtype == np.float64:
return RadiusNeighbors64.compute(
X=X,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ from ...utils._typedefs cimport DTYPE_t, ITYPE_t
from ...utils._cython_blas cimport (
BLAS_Order,
BLAS_Trans,
ColMajor,
NoTrans,
RowMajor,
Trans,
Expand Down Expand Up @@ -176,8 +175,6 @@ cdef class GEMMTermComputer{{name_suffix}}:
ITYPE_t thread_num,
) nogil:
cdef:
ITYPE_t i, j
DTYPE_t squared_dist_i_j
const {{INPUT_DTYPE_t}}[:, ::1] X_c = self.X[X_start:X_end, :]
const {{INPUT_DTYPE_t}}[:, ::1] Y_c = self.Y[Y_start:Y_end, :]
DTYPE_t *dist_middle_terms = self.dist_middle_terms_chunks[thread_num].data()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistanceReducer{{name_suffix}}):
ITYPE_t X_end,
) nogil:
cdef:
ITYPE_t idx, jdx
ITYPE_t idx

# Sorting neighbors for each query vector of X
if self.sort_results:
Expand Down Expand Up @@ -279,7 +279,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistanceReducer{{name_suffix}}):
self,
) nogil:
cdef:
ITYPE_t idx, jdx, thread_num, idx_n_element, idx_current
ITYPE_t idx

with nogil, parallel(num_threads=self.effective_n_threads):
# Merge vectors used in threads into the main ones.
Expand Down Expand Up @@ -439,7 +439,6 @@ cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix}
cdef void _parallel_on_Y_init(
self,
) nogil:
cdef ITYPE_t thread_num
RadiusNeighbors{{name_suffix}}._parallel_on_Y_init(self)
self.gemm_term_computer._parallel_on_Y_init()

Expand Down