diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index c572acf49370c..fdb48fce032f5 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -27,6 +27,13 @@ Changes impacting all modules Changelog --------- +:mod:`sklearn.cluster` +...................... + +- |Fix| Fixed a bug in :class:`cluster.BisectingKMeans`, preventing `fit` to randomly + fail due to a permutation of the labels when running multiple inits. + :pr:`25563` by :user:`Jérémie du Boisberranger `. + :mod:`sklearn.isotonic` ....................... diff --git a/sklearn/cluster/_bisect_k_means.py b/sklearn/cluster/_bisect_k_means.py index 277d88b1d1109..b860596c03540 100644 --- a/sklearn/cluster/_bisect_k_means.py +++ b/sklearn/cluster/_bisect_k_means.py @@ -337,7 +337,9 @@ def _bisect(self, X, x_squared_norms, sample_weight, cluster_to_bisect): X, best_centers, best_labels, sample_weight ) else: # bisecting_strategy == "largest_cluster" - scores = np.bincount(best_labels) + # Using minlength to make sure that we have the counts for both labels even + # if all samples are labelled 0. + scores = np.bincount(best_labels, minlength=2) cluster_to_bisect.split(best_labels, best_centers, scores)