From 311ccec1e38227ef9c7ce7d271b2d3a1b29b90e0 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sun, 15 Sep 2024 22:30:30 +0200 Subject: [PATCH 1/3] add links to KMeans++ examples in docstrings and the user guide --- doc/modules/clustering.rst | 6 +++--- sklearn/cluster/_kmeans.py | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index 3a055abb65c8b..66d4d27fd08b3 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -226,9 +226,9 @@ random initialization, as shown in the reference. For a detailed example of comaparing different initialization schemes, refer to :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`. -K-means++ can also be called independently to select seeds for other -clustering algorithms, see :func:`sklearn.cluster.kmeans_plusplus` for details -and example usage. +K-means++ can also be called independently to select seeds for other clustering +algorithms, see:ref:`sphx_glr_auto_examples_cluster_plot_kmeans_plusplus.py` +for details and example usage. The algorithm supports sample weights, which can be given by a parameter ``sample_weight``. This allows to assign more weight to some samples when diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py index ef7b910e17cb8..5aae0c98caa72 100644 --- a/sklearn/cluster/_kmeans.py +++ b/sklearn/cluster/_kmeans.py @@ -1370,6 +1370,9 @@ class KMeans(_BaseKMeans): For a comparison between K-Means and MiniBatchKMeans refer to example :ref:`sphx_glr_auto_examples_cluster_plot_mini_batch_kmeans.py`. + + For a demonstration of how K-Means can be used for generating initial seeds + for clustering :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_plusplus.py`. """ _parameter_constraints: dict = { From de61275815cd327a7bcc50dbbc4efb874b333fbf Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 21 Oct 2024 21:07:11 +0200 Subject: [PATCH 2/3] move the example to the list of examples in clustering.rst and remove the example from KMeans docs --- doc/modules/clustering.rst | 9 ++++++--- sklearn/cluster/_kmeans.py | 6 ------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index 88f904ab760d7..f5aae039e7648 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -226,9 +226,9 @@ random initialization, as shown in the reference. For a detailed example of comaparing different initialization schemes, refer to :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`. -K-means++ can also be called independently to select seeds for other clustering -algorithms, see:ref:`sphx_glr_auto_examples_cluster_plot_kmeans_plusplus.py` -for details and example usage. +K-means++ can also be called independently to select seeds for other +clustering algorithms, see :func:`sklearn.cluster.kmeans_plusplus` for details +and example usage. The algorithm supports sample weights, which can be given by a parameter ``sample_weight``. This allows to assign more weight to some samples when @@ -241,6 +241,9 @@ to the dataset :math:`X`. * :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`: Document clustering using :class:`KMeans` and :class:`MiniBatchKMeans` based on sparse data +* :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_plusplus.py`: Using K-means++ +to select seeds for other clustering algorithms. + Low-level parallelism --------------------- diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py index bc926c07932a2..80958f8c845a2 100644 --- a/sklearn/cluster/_kmeans.py +++ b/sklearn/cluster/_kmeans.py @@ -1366,14 +1366,8 @@ class KMeans(_BaseKMeans): For a comparison between K-Means and MiniBatchKMeans refer to example :ref:`sphx_glr_auto_examples_cluster_plot_mini_batch_kmeans.py`. - For a demonstration of how K-Means can be used for generating initial seeds - for clustering :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_plusplus.py`. - For a comparison between K-Means and BisectingKMeans refer to example :ref:`sphx_glr_auto_examples_cluster_plot_bisect_kmeans.py`. - - For a demonstration of how K-Means can be used for generating initial seeds - for clustering :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_plusplus.py`. """ _parameter_constraints: dict = { From 00d67dcd9137fcce06f895232204296444fdd66f Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 25 Oct 2024 14:52:12 +0200 Subject: [PATCH 3/3] correct the indentation --- doc/modules/clustering.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index f5aae039e7648..7cf593baf20d1 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -242,7 +242,7 @@ to the dataset :math:`X`. using :class:`KMeans` and :class:`MiniBatchKMeans` based on sparse data * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_plusplus.py`: Using K-means++ -to select seeds for other clustering algorithms. + to select seeds for other clustering algorithms. Low-level parallelism ---------------------