Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e405505

Browse files
jeremiedbbNicolasHug
authored andcommitted
Fix empty clusters not correctly relocated when using sample_weight(#13486)
1 parent a32d974 commit e405505

File tree

3 files changed

+20
-1
lines changed

3 files changed

+20
-1
lines changed

doc/whats_new/v0.21.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ Support for Python 3.4 and below has been officially dropped.
9090
``n_connected_components_``.
9191
:issue:`13427` by :user:`Stephane Couvreur <scouvreur>`.
9292

93+
- |Fix| Fixed a bug in :class:`KMeans` where empty clusters weren't correctly
94+
relocated when using sample weights. :issue:`13486`
95+
by :user:`Jérémie du Boisberranger <jeremiedbb>`.
96+
9397
:mod:`sklearn.datasets`
9498
.......................
9599

sklearn/cluster/_k_means.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ def _centers_dense(np.ndarray[floating, ndim=2] X,
309309
for i, cluster_id in enumerate(empty_clusters):
310310
# XXX two relocated clusters could be close to each other
311311
far_index = far_from_centers[i]
312-
new_center = X[far_index]
312+
new_center = X[far_index] * sample_weight[far_index]
313313
centers[cluster_id] = new_center
314314
weight_in_cluster[cluster_id] = sample_weight[far_index]
315315

sklearn/cluster/tests/test_k_means.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from sklearn.utils.testing import assert_equal
1010
from sklearn.utils.testing import assert_array_equal
1111
from sklearn.utils.testing import assert_array_almost_equal
12+
from sklearn.utils.testing import assert_allclose
1213
from sklearn.utils.testing import assert_almost_equal
1314
from sklearn.utils.testing import assert_raises
1415
from sklearn.utils.testing import assert_raises_regex
@@ -922,3 +923,17 @@ def test_iter_attribute():
922923
estimator = KMeans(algorithm="elkan", max_iter=1)
923924
estimator.fit(np.random.rand(10, 10))
924925
assert estimator.n_iter_ == 1
926+
927+
928+
def test_k_means_empty_cluster_relocated():
929+
# check that empty clusters are correctly relocated when using sample
930+
# weights (#13486)
931+
X = np.array([[-1], [1]])
932+
sample_weight = [1.9, 0.1]
933+
init = np.array([[-1], [10]])
934+
935+
km = KMeans(n_clusters=2, init=init, n_init=1)
936+
km.fit(X, sample_weight=sample_weight)
937+
938+
assert len(set(km.labels_)) == 2
939+
assert_allclose(km.cluster_centers_, [[-1], [1]])

0 commit comments

Comments
 (0)