From 4f4a362f1a67adbea05f161d36a673223a2fdb04 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Sun, 29 Sep 2024 19:23:12 +0200 Subject: [PATCH] DOC remove color quantization K-means example --- doc/conf.py | 3 + doc/datasets/loading_other_datasets.rst | 17 ++-- doc/modules/clustering.rst | 5 -- examples/cluster/plot_color_quantization.py | 93 --------------------- sklearn/cluster/_kmeans.py | 3 - 5 files changed, 14 insertions(+), 107 deletions(-) delete mode 100644 examples/cluster/plot_color_quantization.py diff --git a/doc/conf.py b/doc/conf.py index ea81e0772a2f2..7cd59168ec4bc 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -472,6 +472,9 @@ def add_js_css_files(app, pagename, templatename, context, doctree): "auto_examples/linear_model/plot_lasso_coordinate_descent_path.py": ( "auto_examples/linear_model/plot_lasso_lasso_lars_elasticnet_path.py" ), + "auto_examples/cluster/plot_color_quantization": ( + "auto_examples/cluster/plot_face_compress" + ), } html_context["redirects"] = redirects for old_link in redirects: diff --git a/doc/datasets/loading_other_datasets.rst b/doc/datasets/loading_other_datasets.rst index 004aa66c001e5..410aaee68c0f3 100644 --- a/doc/datasets/loading_other_datasets.rst +++ b/doc/datasets/loading_other_datasets.rst @@ -19,11 +19,20 @@ and pipelines on 2D data. load_sample_images load_sample_image -.. image:: ../auto_examples/cluster/images/sphx_glr_plot_color_quantization_001.png - :target: ../auto_examples/cluster/plot_color_quantization.html +.. plot:: + :context: close-figs :scale: 30 :align: right + :include-source: False + import matplotlib.pyplot as plt + from sklearn.datasets import load_sample_image + + china = load_sample_image("china.jpg") + plt.imshow(china) + plt.axis('off') + plt.tight_layout() + plt.show() .. warning:: @@ -33,10 +42,6 @@ and pipelines on 2D data. if you plan to use ``matplotlib.pyplpt.imshow``, don't forget to scale to the range 0 - 1 as done in the following example. -.. rubric:: Examples - -* :ref:`sphx_glr_auto_examples_cluster_plot_color_quantization.py` - .. _libsvm_loader: Datasets in svmlight / libsvm format diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index 3a055abb65c8b..863c68f72b588 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -236,11 +236,6 @@ computing cluster centers and values of inertia. For example, assigning a weight of 2 to a sample is equivalent to adding a duplicate of that sample to the dataset :math:`X`. -K-means can be used for vector quantization. This is achieved using the -``transform`` method of a trained model of :class:`KMeans`. For an example of -performing vector quantization on an image refer to -:ref:`sphx_glr_auto_examples_cluster_plot_color_quantization.py`. - .. rubric:: Examples * :ref:`sphx_glr_auto_examples_cluster_plot_cluster_iris.py`: Example usage of diff --git a/examples/cluster/plot_color_quantization.py b/examples/cluster/plot_color_quantization.py deleted file mode 100644 index bd1958d3cf145..0000000000000 --- a/examples/cluster/plot_color_quantization.py +++ /dev/null @@ -1,93 +0,0 @@ -""" -================================== -Color Quantization using K-Means -================================== - -Performs a pixel-wise Vector Quantization (VQ) of an image of the summer palace -(China), reducing the number of colors required to show the image from 96,615 -unique colors to 64, while preserving the overall appearance quality. - -In this example, pixels are represented in a 3D-space and K-means is used to -find 64 color clusters. In the image processing literature, the codebook -obtained from K-means (the cluster centers) is called the color palette. Using -a single byte, up to 256 colors can be addressed, whereas an RGB encoding -requires 3 bytes per pixel. The GIF file format, for example, uses such a -palette. - -For comparison, a quantized image using a random codebook (colors picked up -randomly) is also shown. - -""" - -# Authors: The scikit-learn developers -# SPDX-License-Identifier: BSD-3-Clause - -from time import time - -import matplotlib.pyplot as plt -import numpy as np - -from sklearn.cluster import KMeans -from sklearn.datasets import load_sample_image -from sklearn.metrics import pairwise_distances_argmin -from sklearn.utils import shuffle - -n_colors = 64 - -# Load the Summer Palace photo -china = load_sample_image("china.jpg") - -# Convert to floats instead of the default 8 bits integer coding. Dividing by -# 255 is important so that plt.imshow works well on float data (need to -# be in the range [0-1]) -china = np.array(china, dtype=np.float64) / 255 - -# Load Image and transform to a 2D numpy array. -w, h, d = original_shape = tuple(china.shape) -assert d == 3 -image_array = np.reshape(china, (w * h, d)) - -print("Fitting model on a small sub-sample of the data") -t0 = time() -image_array_sample = shuffle(image_array, random_state=0, n_samples=1_000) -kmeans = KMeans(n_clusters=n_colors, random_state=0).fit(image_array_sample) -print(f"done in {time() - t0:0.3f}s.") - -# Get labels for all points -print("Predicting color indices on the full image (k-means)") -t0 = time() -labels = kmeans.predict(image_array) -print(f"done in {time() - t0:0.3f}s.") - - -codebook_random = shuffle(image_array, random_state=0, n_samples=n_colors) -print("Predicting color indices on the full image (random)") -t0 = time() -labels_random = pairwise_distances_argmin(codebook_random, image_array, axis=0) -print(f"done in {time() - t0:0.3f}s.") - - -def recreate_image(codebook, labels, w, h): - """Recreate the (compressed) image from the code book & labels""" - return codebook[labels].reshape(w, h, -1) - - -# Display all results, alongside original image -plt.figure(1) -plt.clf() -plt.axis("off") -plt.title("Original image (96,615 colors)") -plt.imshow(china) - -plt.figure(2) -plt.clf() -plt.axis("off") -plt.title(f"Quantized image ({n_colors} colors, K-Means)") -plt.imshow(recreate_image(kmeans.cluster_centers_, labels, w, h)) - -plt.figure(3) -plt.clf() -plt.axis("off") -plt.title(f"Quantized image ({n_colors} colors, Random)") -plt.imshow(recreate_image(codebook_random, labels_random, w, h)) -plt.show() diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py index ef7b910e17cb8..23f36cbaee212 100644 --- a/sklearn/cluster/_kmeans.py +++ b/sklearn/cluster/_kmeans.py @@ -1362,9 +1362,6 @@ class KMeans(_BaseKMeans): For examples of common problems with K-Means and how to address them see :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_assumptions.py`. - For an example of how to use K-Means to perform color quantization see - :ref:`sphx_glr_auto_examples_cluster_plot_color_quantization.py`. - For a demonstration of how K-Means can be used to cluster text documents see :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`.