Thanks to visit codestin.com
Credit goes to github.com

Skip to content

FEA CSR support for all DistanceMetric #23604

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
b8bd875
MAINT Implement CSR support for all DistanceMetric
jjerphan Jun 11, 2022
7b07188
Merge branch 'main' into maint/dist-metrics-csr-support
jjerphan Jun 14, 2022
fb99680
TST Remove useless guard
jjerphan Jun 15, 2022
d39d2b2
TST Skip JaccardDistance on 32bit architecture
jjerphan Jun 15, 2022
011e2a2
MAINT Define dtype alias for sparse matrices indices
jjerphan Jun 16, 2022
a579630
MAINT Do not shadow dtype names in Tempita templating
jjerphan Jun 16, 2022
98e9d21
fixup! MAINT Define dtype alias for sparse matrices indices
jjerphan Jun 16, 2022
8aa4e44
TST Use cdist and pdist appropriately
jjerphan Jun 16, 2022
9edfa11
DOC Improve comments
jjerphan Jun 17, 2022
ee5c6bf
Fixups
jjerphan Jun 17, 2022
bf5eb59
MAINT Wrap of indptr values to support sparse-dense
jjerphan Jun 17, 2022
92b8a6c
Apply review comments
jjerphan Jun 17, 2022
dc6f8cf
More interesting boolean data for tests
ogrisel Jun 17, 2022
bb06f59
FIX Various corrections
jjerphan Jun 17, 2022
a5eb20d
FIX Make Jaccard, Hamming and Hashing robust to explicit zeros
jjerphan Jun 17, 2022
19edf11
FIX Make the other boolean DistanceMetric also robust to explicit zeros
jjerphan Jun 17, 2022
de86802
TST Remove xfail for Jaccard on 32bit arch.
jjerphan Jun 17, 2022
bb920cf
Cast to np.float64_t where appropriate
jjerphan Jun 17, 2022
b3759fe
Rename methods and correctly format their signatures
jjerphan Jun 20, 2022
7f89236
fixup! TST Remove xfail for Jaccard on 32bit arch.
jjerphan Jun 20, 2022
01a0c33
FEA CSR support for HaversineDistance
jjerphan Jun 20, 2022
7d8a717
Fix typo
jjerphan Jun 22, 2022
563e359
Do not upcast to 64bit yet keep the same precision
jjerphan Jun 22, 2022
f863a51
Do use the default rtol
jjerphan Jun 22, 2022
5ba0fbe
Set rtol explicitly in test_distance_metrics_dtype_consistency
ogrisel Jun 22, 2022
4f45839
Implement the sparse-dense and the dense-sparse case for c-contiguity
jjerphan Jun 23, 2022
3e3e888
Add validation on X and Y, accepting CSR as inputs
jjerphan Jun 23, 2022
ddc49d5
Remove left-overs
jjerphan Jun 23, 2022
a83887c
Merge branch 'main' into maint/dist-metrics-csr-support
jjerphan Jun 24, 2022
731370a
DOC Motivate the signature for DistanceMetric.{dist_csr, rdist_csr}
jjerphan Jun 29, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 89 additions & 26 deletions sklearn/metrics/_dist_metrics.pxd.tp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
implementation_specific_values = [
# Values are the following ones:
#
# name_suffix, DTYPE_t, DTYPE
# name_suffix, INPUT_DTYPE_t, INPUT_DTYPE
#
# On the first hand, an empty string is used for `name_suffix`
# for the float64 case as to still be able to expose the original
Expand All @@ -28,18 +28,18 @@ implementation_specific_values = [
cimport numpy as cnp
from libc.math cimport sqrt, exp

from ..utils._typedefs cimport DTYPE_t, ITYPE_t
from ..utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t

{{for name_suffix, DTYPE_t, DTYPE in implementation_specific_values}}
{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}

######################################################################
# Inline distance functions
#
# We use these for the default (euclidean) case so that they can be
# inlined. This leads to faster computation for the most common case
cdef inline DTYPE_t euclidean_dist{{name_suffix}}(
const {{DTYPE_t}}* x1,
const {{DTYPE_t}}* x2,
const {{INPUT_DTYPE_t}}* x1,
const {{INPUT_DTYPE_t}}* x2,
ITYPE_t size,
) nogil except -1:
cdef DTYPE_t tmp, d=0
Expand All @@ -51,8 +51,8 @@ cdef inline DTYPE_t euclidean_dist{{name_suffix}}(


cdef inline DTYPE_t euclidean_rdist{{name_suffix}}(
const {{DTYPE_t}}* x1,
const {{DTYPE_t}}* x2,
const {{INPUT_DTYPE_t}}* x1,
const {{INPUT_DTYPE_t}}* x2,
ITYPE_t size,
) nogil except -1:
cdef DTYPE_t tmp, d=0
Expand All @@ -63,11 +63,11 @@ cdef inline DTYPE_t euclidean_rdist{{name_suffix}}(
return d


cdef inline DTYPE_t euclidean_dist_to_rdist{{name_suffix}}(const {{DTYPE_t}} dist) nogil except -1:
cdef inline DTYPE_t euclidean_dist_to_rdist{{name_suffix}}(const {{INPUT_DTYPE_t}} dist) nogil except -1:
return dist * dist


cdef inline DTYPE_t euclidean_rdist_to_dist{{name_suffix}}(const {{DTYPE_t}} dist) nogil except -1:
cdef inline DTYPE_t euclidean_rdist_to_dist{{name_suffix}}(const {{INPUT_DTYPE_t}} dist) nogil except -1:
return sqrt(dist)


Expand All @@ -78,26 +78,89 @@ cdef class DistanceMetric{{name_suffix}}:
# we must define them here so that cython's limited polymorphism will work.
# Because we don't expect to instantiate a lot of these objects, the
# extra memory overhead of this setup should not be an issue.
cdef {{DTYPE_t}} p
cdef {{DTYPE_t}}[::1] vec
cdef {{DTYPE_t}}[:, ::1] mat
cdef DTYPE_t p
cdef DTYPE_t[::1] vec
cdef DTYPE_t[:, ::1] mat
cdef ITYPE_t size
cdef object func
cdef object kwargs

cdef DTYPE_t dist(self, const {{DTYPE_t}}* x1, const {{DTYPE_t}}* x2,
ITYPE_t size) nogil except -1

cdef DTYPE_t rdist(self, const {{DTYPE_t}}* x1, const {{DTYPE_t}}* x2,
ITYPE_t size) nogil except -1

cdef int pdist(self, const {{DTYPE_t}}[:, ::1] X, {{DTYPE_t}}[:, ::1] D) except -1

cdef int cdist(self, const {{DTYPE_t}}[:, ::1] X, const {{DTYPE_t}}[:, ::1] Y,
{{DTYPE_t}}[:, ::1] D) except -1

cdef DTYPE_t _rdist_to_dist(self, {{DTYPE_t}} rdist) nogil except -1

cdef DTYPE_t _dist_to_rdist(self, {{DTYPE_t}} dist) nogil except -1
cdef DTYPE_t dist(
self,
const {{INPUT_DTYPE_t}}* x1,
const {{INPUT_DTYPE_t}}* x2,
ITYPE_t size,
) nogil except -1

cdef DTYPE_t rdist(
self,
const {{INPUT_DTYPE_t}}* x1,
const {{INPUT_DTYPE_t}}* x2,
ITYPE_t size,
) nogil except -1

cdef DTYPE_t dist_csr(
self,
const {{INPUT_DTYPE_t}}[:] x1_data,
const SPARSE_INDEX_TYPE_t[:] x1_indices,
const {{INPUT_DTYPE_t}}[:] x2_data,
const SPARSE_INDEX_TYPE_t[:] x2_indices,
const SPARSE_INDEX_TYPE_t x1_start,
const SPARSE_INDEX_TYPE_t x1_end,
const SPARSE_INDEX_TYPE_t x2_start,
const SPARSE_INDEX_TYPE_t x2_end,
const ITYPE_t size,
) nogil except -1

cdef DTYPE_t rdist_csr(
self,
const {{INPUT_DTYPE_t}}[:] x1_data,
const SPARSE_INDEX_TYPE_t[:] x1_indices,
const {{INPUT_DTYPE_t}}[:] x2_data,
const SPARSE_INDEX_TYPE_t[:] x2_indices,
const SPARSE_INDEX_TYPE_t x1_start,
const SPARSE_INDEX_TYPE_t x1_end,
const SPARSE_INDEX_TYPE_t x2_start,
const SPARSE_INDEX_TYPE_t x2_end,
const ITYPE_t size,
) nogil except -1

cdef int pdist(
self,
const {{INPUT_DTYPE_t}}[:, ::1] X,
DTYPE_t[:, ::1] D,
) except -1

cdef int cdist(
self,
const {{INPUT_DTYPE_t}}[:, ::1] X,
const {{INPUT_DTYPE_t}}[:, ::1] Y,
DTYPE_t[:, ::1] D,
) except -1

cdef int pdist_csr(
self,
const {{INPUT_DTYPE_t}}[:] x1_data,
const SPARSE_INDEX_TYPE_t[:] x1_indices,
const SPARSE_INDEX_TYPE_t[:] x1_indptr,
const ITYPE_t size,
DTYPE_t[:, ::1] D,
) nogil except -1

cdef int cdist_csr(
self,
const {{INPUT_DTYPE_t}}[:] x1_data,
const SPARSE_INDEX_TYPE_t[:] x1_indices,
const SPARSE_INDEX_TYPE_t[:] x1_indptr,
const {{INPUT_DTYPE_t}}[:] x2_data,
const SPARSE_INDEX_TYPE_t[:] x2_indices,
const SPARSE_INDEX_TYPE_t[:] x2_indptr,
const ITYPE_t size,
DTYPE_t[:, ::1] D,
) nogil except -1

cdef DTYPE_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) nogil except -1

cdef DTYPE_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) nogil except -1

{{endfor}}
Loading