From b5142a23c45edcd7cc26935aefff3d4197fe5929 Mon Sep 17 00:00:00 2001 From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com> Date: Tue, 20 Apr 2021 16:50:26 +0000 Subject: [PATCH 1/4] efficiency improvement --- sklearn/preprocessing/_discretization.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py index 22fa236f3314e..526aa74d31007 100644 --- a/sklearn/preprocessing/_discretization.py +++ b/sklearn/preprocessing/_discretization.py @@ -205,7 +205,8 @@ def fit(self, X, y=None): init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5 # 1D k-means procedure - km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1) + km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1, + algorithm='full') centers = km.fit(column[:, None]).cluster_centers_[:, 0] # Must sort, centers may be unsorted even with sorted init centers.sort() From fcd35aca545eb9c75305149f36574e23ecbaac7e Mon Sep 17 00:00:00 2001 From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com> Date: Tue, 20 Apr 2021 16:55:20 +0000 Subject: [PATCH 2/4] update doc --- doc/whats_new/v1.0.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index a78cbe69b746d..c7e9cd4067e86 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -385,6 +385,10 @@ Changelog - |Fix| :meth:`preprocessing.OrdinalEncoder.inverse_transform` is not supporting sparse matrix and raise the appropriate error message. :pr:`19879` by :user:`Guillaume Lemaitre `. + +- |Efficiency| Changed ``algorithm`` argument for :class:`cluster.KMeans` in + :class:`preprocessing.KBinsDiscretizer` from ``auto`` to ``full``. + :pr:`19934` by :user:`Gleb Levitskiy `. :mod:`sklearn.tree` ................... From 21727b578da9f970b5cee9fe5f68fc6cff8e8420 Mon Sep 17 00:00:00 2001 From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com> Date: Tue, 20 Apr 2021 16:58:36 +0000 Subject: [PATCH 3/4] lint --- doc/whats_new/v1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index c7e9cd4067e86..8a2351b04ecc2 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -385,7 +385,7 @@ Changelog - |Fix| :meth:`preprocessing.OrdinalEncoder.inverse_transform` is not supporting sparse matrix and raise the appropriate error message. :pr:`19879` by :user:`Guillaume Lemaitre `. - + - |Efficiency| Changed ``algorithm`` argument for :class:`cluster.KMeans` in :class:`preprocessing.KBinsDiscretizer` from ``auto`` to ``full``. :pr:`19934` by :user:`Gleb Levitskiy `. From 96eeef14e56d01aafd7d6058ed2e0d7648a9b05e Mon Sep 17 00:00:00 2001 From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com> Date: Tue, 20 Apr 2021 16:59:09 +0000 Subject: [PATCH 4/4] lint --- sklearn/preprocessing/_discretization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py index 526aa74d31007..9ce95a97544a5 100644 --- a/sklearn/preprocessing/_discretization.py +++ b/sklearn/preprocessing/_discretization.py @@ -205,7 +205,7 @@ def fit(self, X, y=None): init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5 # 1D k-means procedure - km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1, + km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1, algorithm='full') centers = km.fit(column[:, None]).cluster_centers_[:, 0] # Must sort, centers may be unsorted even with sorted init