From 9807ef05c26b3474409b1f5eada4fb2e569a9b95 Mon Sep 17 00:00:00 2001 From: sergul Date: Thu, 26 Oct 2017 14:08:26 -0400 Subject: [PATCH 01/16] faster way of computing means across each group --- sklearn/cluster/_feature_agglomeration.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index c6daf4540ef27..3257011844d4f 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -40,13 +40,20 @@ def transform(self, X): pooling_func = self.pooling_func X = check_array(X) + size = np.bincount(self.labels_) + n_samples = X.shape[0] nX = [] if len(self.labels_) != X.shape[1]: raise ValueError("X has a different number of features than " "during fitting.") - for l in np.unique(self.labels_): - nX.append(pooling_func(X[:, self.labels_ == l], axis=1)) + if pooling_func == np.mean: + # a fast way to compute the mean of grouped features + nX = np.array([np.bincount(self.labels_, X[i, :])]/size for i in range(n_samples)) + else: + for l in np.unique(self.labels_): + nX.append(pooling_func(X[:, self.labels_ == l], axis=1)) + nX = np.array(X).T return np.array(nX).T def inverse_transform(self, Xred): From dfcd4227124230cb841c3d9266b899cacd25fcc0 Mon Sep 17 00:00:00 2001 From: sergul Date: Thu, 26 Oct 2017 16:23:15 -0400 Subject: [PATCH 02/16] fixed bug --- sklearn/cluster/_feature_agglomeration.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index 3257011844d4f..6bdd8f4ef9619 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -53,8 +53,8 @@ def transform(self, X): else: for l in np.unique(self.labels_): nX.append(pooling_func(X[:, self.labels_ == l], axis=1)) - nX = np.array(X).T - return np.array(nX).T + nX = np.array(nX).T + return nX def inverse_transform(self, Xred): """ From b36422cce4779cecbcee209d1d7834349556d728 Mon Sep 17 00:00:00 2001 From: sergul Date: Thu, 26 Oct 2017 16:55:59 -0400 Subject: [PATCH 03/16] fixed the bug with paranthesis --- sklearn/cluster/_feature_agglomeration.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index 6bdd8f4ef9619..e5b3977825d01 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -46,10 +46,9 @@ def transform(self, X): if len(self.labels_) != X.shape[1]: raise ValueError("X has a different number of features than " "during fitting.") - if pooling_func == np.mean: # a fast way to compute the mean of grouped features - nX = np.array([np.bincount(self.labels_, X[i, :])]/size for i in range(n_samples)) + nX = np.array([np.bincount(self.labels_, X[i, :])/size for i in range(n_samples)]) else: for l in np.unique(self.labels_): nX.append(pooling_func(X[:, self.labels_ == l], axis=1)) From 534f757a5f530ac761ff513ad7d9dcbd31c08888 Mon Sep 17 00:00:00 2001 From: sergul Date: Thu, 26 Oct 2017 18:03:43 -0400 Subject: [PATCH 04/16] fixed PEP8 issues --- sklearn/cluster/_feature_agglomeration.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index e5b3977825d01..696c0dec60d76 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -14,6 +14,7 @@ ############################################################################### # Mixin class for feature agglomeration. + class AgglomerationTransform(TransformerMixin): """ A class for feature agglomeration via the transform interface @@ -48,7 +49,8 @@ def transform(self, X): "during fitting.") if pooling_func == np.mean: # a fast way to compute the mean of grouped features - nX = np.array([np.bincount(self.labels_, X[i, :])/size for i in range(n_samples)]) + nX = np.array([np.bincount(self.labels_, X[i, :])/size + for i in range(n_samples)]) else: for l in np.unique(self.labels_): nX.append(pooling_func(X[:, self.labels_ == l], axis=1)) From f37287a36eb4c0c7eb21150f539248d032115b17 Mon Sep 17 00:00:00 2001 From: sergul Date: Thu, 26 Oct 2017 19:05:20 -0400 Subject: [PATCH 05/16] Benchmarking np.bincount vs np.mean for feature agglomeration --- benchmarks/bench_feature_agglomeration.py | 56 +++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 benchmarks/bench_feature_agglomeration.py diff --git a/benchmarks/bench_feature_agglomeration.py b/benchmarks/bench_feature_agglomeration.py new file mode 100644 index 0000000000000..a289cb2f73ad3 --- /dev/null +++ b/benchmarks/bench_feature_agglomeration.py @@ -0,0 +1,56 @@ +""" +Benchmarks np.bincount method vs np.mean for feature agglomeration in +../sklearn/cluster/_feature_agglomeration. Use of np.bincount provides +a significant speed up if the pooling function is np.mean. + +np.bincount performs better especially as the size of X and n_clusters +increase. +""" + +import numpy as np +from sklearn.feature_extraction.image import grid_to_graph +from sklearn.cluster import FeatureAgglomeration +import time + +def fit_agglomeration(): + rng = np.random.RandomState(0) + X = rng.randn(50, 1000) + agglo = FeatureAgglomeration(n_clusters=500) + agglo.fit(X) + return X, agglo + +def get_transformed_array(X, agglo, method): + size = np.bincount(agglo.labels_) + n_samples = X.shape[0] + nX = [] + if len(agglo.labels_) != X.shape[1]: + raise ValueError("X has a different number of features than " + "during fitting.") + if method == "bincount": + # a fast way to compute the mean of grouped features + nX = np.array([np.bincount(agglo.labels_, X[i, :]) / size + for i in range(n_samples)]) + elif method == "np_mean": + for l in np.unique(agglo.labels_): + nX.append(np.mean(X[:, agglo.labels_ == l], axis=1)) + nX = np.array(nX).T + else: + raise ValueError("Method can have a value of 'bincount' or 'np.mean'") + return nX + +if __name__ == "__main__": + X, agglo = fit_agglomeration() + + tick = time.time() + result_bincount = get_transformed_array(X, agglo, "bincount") + time_bincount = time.time() - tick + + tick = time.time() + result_np_mean = get_transformed_array(X, agglo, "np_mean") + time_np_mean = time.time() - tick + + print('==================') + print('Took %s seconds using np.bincount' % (time_bincount)) + print('Took %s seconds using np.mean' % (time_np_mean)) + print('==================') + print("np.bincount is %s times faster" % (time_np_mean/time_bincount)) \ No newline at end of file From 593c8ed799019e74f92fd9e163f269faaa9b3e3c Mon Sep 17 00:00:00 2001 From: sergul Date: Thu, 26 Oct 2017 19:31:29 -0400 Subject: [PATCH 06/16] fixed PEP8 issues for benchmark file --- benchmarks/bench_feature_agglomeration.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/benchmarks/bench_feature_agglomeration.py b/benchmarks/bench_feature_agglomeration.py index a289cb2f73ad3..c2d7aaad284a2 100644 --- a/benchmarks/bench_feature_agglomeration.py +++ b/benchmarks/bench_feature_agglomeration.py @@ -8,17 +8,18 @@ """ import numpy as np -from sklearn.feature_extraction.image import grid_to_graph from sklearn.cluster import FeatureAgglomeration import time + def fit_agglomeration(): rng = np.random.RandomState(0) - X = rng.randn(50, 1000) - agglo = FeatureAgglomeration(n_clusters=500) + X = rng.randn(100000, 1000) + agglo = FeatureAgglomeration(n_clusters=5) agglo.fit(X) return X, agglo + def get_transformed_array(X, agglo, method): size = np.bincount(agglo.labels_) n_samples = X.shape[0] @@ -38,6 +39,7 @@ def get_transformed_array(X, agglo, method): raise ValueError("Method can have a value of 'bincount' or 'np.mean'") return nX + if __name__ == "__main__": X, agglo = fit_agglomeration() @@ -53,4 +55,4 @@ def get_transformed_array(X, agglo, method): print('Took %s seconds using np.bincount' % (time_bincount)) print('Took %s seconds using np.mean' % (time_np_mean)) print('==================') - print("np.bincount is %s times faster" % (time_np_mean/time_bincount)) \ No newline at end of file + print("np.bincount is %s times faster" % (time_np_mean/time_bincount)) From 94ad19a7484c58387b1f628cc99ec1560257879c Mon Sep 17 00:00:00 2001 From: sergul Date: Fri, 27 Oct 2017 17:31:50 -0400 Subject: [PATCH 07/16] removed pooling_func as it was depreciated --- sklearn/cluster/_feature_agglomeration.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index 696c0dec60d76..43e88ec9f0a18 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -47,14 +47,9 @@ def transform(self, X): if len(self.labels_) != X.shape[1]: raise ValueError("X has a different number of features than " "during fitting.") - if pooling_func == np.mean: - # a fast way to compute the mean of grouped features - nX = np.array([np.bincount(self.labels_, X[i, :])/size - for i in range(n_samples)]) - else: - for l in np.unique(self.labels_): - nX.append(pooling_func(X[:, self.labels_ == l], axis=1)) - nX = np.array(nX).T + # a fast way to compute the mean of grouped features + nX = np.array([np.bincount(self.labels_, X[i, :])/size + for i in range(n_samples)]) return nX def inverse_transform(self, Xred): From 3defcd6ddcb288d353ef2eebcc495f7c5e809026 Mon Sep 17 00:00:00 2001 From: sergul Date: Sun, 29 Oct 2017 14:40:37 -0400 Subject: [PATCH 08/16] test file added,bencmark removed --- benchmarks/bench_feature_agglomeration.py | 58 ------------------- .../tests/test_feature_agglomeration.py | 44 ++++++++++++++ 2 files changed, 44 insertions(+), 58 deletions(-) delete mode 100644 benchmarks/bench_feature_agglomeration.py create mode 100644 sklearn/cluster/tests/test_feature_agglomeration.py diff --git a/benchmarks/bench_feature_agglomeration.py b/benchmarks/bench_feature_agglomeration.py deleted file mode 100644 index c2d7aaad284a2..0000000000000 --- a/benchmarks/bench_feature_agglomeration.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -Benchmarks np.bincount method vs np.mean for feature agglomeration in -../sklearn/cluster/_feature_agglomeration. Use of np.bincount provides -a significant speed up if the pooling function is np.mean. - -np.bincount performs better especially as the size of X and n_clusters -increase. -""" - -import numpy as np -from sklearn.cluster import FeatureAgglomeration -import time - - -def fit_agglomeration(): - rng = np.random.RandomState(0) - X = rng.randn(100000, 1000) - agglo = FeatureAgglomeration(n_clusters=5) - agglo.fit(X) - return X, agglo - - -def get_transformed_array(X, agglo, method): - size = np.bincount(agglo.labels_) - n_samples = X.shape[0] - nX = [] - if len(agglo.labels_) != X.shape[1]: - raise ValueError("X has a different number of features than " - "during fitting.") - if method == "bincount": - # a fast way to compute the mean of grouped features - nX = np.array([np.bincount(agglo.labels_, X[i, :]) / size - for i in range(n_samples)]) - elif method == "np_mean": - for l in np.unique(agglo.labels_): - nX.append(np.mean(X[:, agglo.labels_ == l], axis=1)) - nX = np.array(nX).T - else: - raise ValueError("Method can have a value of 'bincount' or 'np.mean'") - return nX - - -if __name__ == "__main__": - X, agglo = fit_agglomeration() - - tick = time.time() - result_bincount = get_transformed_array(X, agglo, "bincount") - time_bincount = time.time() - tick - - tick = time.time() - result_np_mean = get_transformed_array(X, agglo, "np_mean") - time_np_mean = time.time() - tick - - print('==================') - print('Took %s seconds using np.bincount' % (time_bincount)) - print('Took %s seconds using np.mean' % (time_np_mean)) - print('==================') - print("np.bincount is %s times faster" % (time_np_mean/time_bincount)) diff --git a/sklearn/cluster/tests/test_feature_agglomeration.py b/sklearn/cluster/tests/test_feature_agglomeration.py new file mode 100644 index 0000000000000..613bb0a28b665 --- /dev/null +++ b/sklearn/cluster/tests/test_feature_agglomeration.py @@ -0,0 +1,44 @@ +""" +Tests for sklearn.cluster._feature_agglomeration +""" +# Authors: Sergul Aydore 2017 +import numpy as np +from sklearn.cluster import FeatureAgglomeration +from sklearn.utils.testing import assert_true +from sklearn.utils.testing import assert_raises +from sklearn.utils.testing import assert_array_almost_equal + + +def test_feature_agglomeration(): + rng = np.random.RandomState(0) + n_samples, n_features, n_clusters = 10000, 1000, 200 + X = rng.randn(n_samples, n_features) + agglo_mean = FeatureAgglomeration(n_clusters=n_clusters, + pooling_func=np.mean) + agglo_median = FeatureAgglomeration(n_clusters=n_clusters, + pooling_func=np.median) + agglo_mean.fit(X) + agglo_median.fit(X) + assert_true(np.size(np.unique(agglo_mean.labels_)) == n_clusters) + assert_true(np.size(np.unique(agglo_median.labels_)) == n_clusters) + + # Test transform + X_red_mean = agglo_mean.transform(X) + X_red_median = agglo_median.transform(X) + assert_true(X_red_mean.shape[1] == n_clusters) + assert_true(X_red_median.shape[1] == n_clusters) + + # Check that fitting with no samples raises a ValueError + assert_raises(ValueError, agglo_mean.fit, X[:0]) + assert_raises(ValueError, agglo_median.fit, X[:0]) + + # Test inverse transform + X_full_mean = agglo_mean.inverse_transform(X_red_mean) + X_full_median = agglo_mean.inverse_transform(X_red_median) + assert_true(np.unique(X_full_mean[0]).size == n_clusters) + assert_true(np.unique(X_full_median[0]).size == n_clusters) + + assert_array_almost_equal(agglo_mean.transform(X_full_mean), + X_red_mean) + assert_array_almost_equal(agglo_mean.transform(X_full_median), + X_red_median) From 7b5de282c9afdd4d94b9b028ff432399229c203c Mon Sep 17 00:00:00 2001 From: sergul Date: Sun, 29 Oct 2017 14:42:28 -0400 Subject: [PATCH 09/16] special case was added back --- sklearn/cluster/_feature_agglomeration.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index 43e88ec9f0a18..e248603151f96 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -41,15 +41,20 @@ def transform(self, X): pooling_func = self.pooling_func X = check_array(X) - size = np.bincount(self.labels_) - n_samples = X.shape[0] - nX = [] if len(self.labels_) != X.shape[1]: raise ValueError("X has a different number of features than " "during fitting.") - # a fast way to compute the mean of grouped features - nX = np.array([np.bincount(self.labels_, X[i, :])/size - for i in range(n_samples)]) + if pooling_func == np.mean: + size = np.bincount(self.labels_) + n_samples = X.shape[0] + # a fast way to compute the mean of grouped features + nX = np.array([np.bincount(self.labels_, X[i, :])/size + for i in range(n_samples)]) + else: + nX = [] + for l in np.unique(self.labels_): + nX.append(pooling_func(X[:, self.labels_ == l], axis=1)) + nX = np.array(nX).T return nX def inverse_transform(self, Xred): From 5818abfcd96684f45003af0f364b1cdc00a048a1 Mon Sep 17 00:00:00 2001 From: sergul Date: Mon, 30 Oct 2017 13:08:32 -0400 Subject: [PATCH 10/16] after jnothman's review --- .../tests/test_feature_agglomeration.py | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/sklearn/cluster/tests/test_feature_agglomeration.py b/sklearn/cluster/tests/test_feature_agglomeration.py index 613bb0a28b665..d12ac2f71d3cc 100644 --- a/sklearn/cluster/tests/test_feature_agglomeration.py +++ b/sklearn/cluster/tests/test_feature_agglomeration.py @@ -10,9 +10,9 @@ def test_feature_agglomeration(): - rng = np.random.RandomState(0) - n_samples, n_features, n_clusters = 10000, 1000, 200 - X = rng.randn(n_samples, n_features) + n_clusters = 1 + X = np.array([0, 0, 1], ndmin=2) # (n_samples, n_features) + agglo_mean = FeatureAgglomeration(n_clusters=n_clusters, pooling_func=np.mean) agglo_median = FeatureAgglomeration(n_clusters=n_clusters, @@ -23,22 +23,18 @@ def test_feature_agglomeration(): assert_true(np.size(np.unique(agglo_median.labels_)) == n_clusters) # Test transform - X_red_mean = agglo_mean.transform(X) - X_red_median = agglo_median.transform(X) - assert_true(X_red_mean.shape[1] == n_clusters) - assert_true(X_red_median.shape[1] == n_clusters) - - # Check that fitting with no samples raises a ValueError - assert_raises(ValueError, agglo_mean.fit, X[:0]) - assert_raises(ValueError, agglo_median.fit, X[:0]) + Xt_mean = agglo_mean.transform(X) + Xt_median = agglo_median.transform(X) + assert_true(Xt_mean.shape[1] == n_clusters) + assert_true(Xt_median.shape[1] == n_clusters) # Test inverse transform - X_full_mean = agglo_mean.inverse_transform(X_red_mean) - X_full_median = agglo_mean.inverse_transform(X_red_median) + X_full_mean = agglo_mean.inverse_transform(Xt_mean) + X_full_median = agglo_median.inverse_transform(Xt_median) assert_true(np.unique(X_full_mean[0]).size == n_clusters) assert_true(np.unique(X_full_median[0]).size == n_clusters) assert_array_almost_equal(agglo_mean.transform(X_full_mean), - X_red_mean) - assert_array_almost_equal(agglo_mean.transform(X_full_median), - X_red_median) + Xt_mean) + assert_array_almost_equal(agglo_median.transform(X_full_median), + Xt_median) From ef786c6f7ddc4d2189ed2048a3ab3f53a000103e Mon Sep 17 00:00:00 2001 From: sergul Date: Mon, 30 Oct 2017 22:32:57 -0400 Subject: [PATCH 11/16] flake8 error fixed --- sklearn/cluster/tests/test_feature_agglomeration.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/cluster/tests/test_feature_agglomeration.py b/sklearn/cluster/tests/test_feature_agglomeration.py index d12ac2f71d3cc..7ac2dcc8b97a6 100644 --- a/sklearn/cluster/tests/test_feature_agglomeration.py +++ b/sklearn/cluster/tests/test_feature_agglomeration.py @@ -5,7 +5,6 @@ import numpy as np from sklearn.cluster import FeatureAgglomeration from sklearn.utils.testing import assert_true -from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_array_almost_equal From fba7691b690984801acbbee87960d3cbda5aef26 Mon Sep 17 00:00:00 2001 From: sergul Date: Wed, 1 Nov 2017 20:18:20 -0400 Subject: [PATCH 12/16] sparse condition added --- sklearn/cluster/_feature_agglomeration.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index e248603151f96..cd86ff0512e71 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -10,6 +10,7 @@ from ..base import TransformerMixin from ..utils import check_array from ..utils.validation import check_is_fitted +from scipy.sparse import issparse ############################################################################### # Mixin class for feature agglomeration. @@ -44,7 +45,7 @@ def transform(self, X): if len(self.labels_) != X.shape[1]: raise ValueError("X has a different number of features than " "during fitting.") - if pooling_func == np.mean: + if (pooling_func == np.mean) & (not issparse(X)): size = np.bincount(self.labels_) n_samples = X.shape[0] # a fast way to compute the mean of grouped features From 2f9846c6a30b9adfb33939cc23d6dfb9392ae8fd Mon Sep 17 00:00:00 2001 From: sergul Date: Thu, 2 Nov 2017 12:32:36 -0400 Subject: [PATCH 13/16] fixed 'and' and paranthesis --- sklearn/cluster/_feature_agglomeration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index cd86ff0512e71..4cddb1d41c638 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -45,7 +45,7 @@ def transform(self, X): if len(self.labels_) != X.shape[1]: raise ValueError("X has a different number of features than " "during fitting.") - if (pooling_func == np.mean) & (not issparse(X)): + if pooling_func == np.mean and not issparse(X): size = np.bincount(self.labels_) n_samples = X.shape[0] # a fast way to compute the mean of grouped features From 72caa776a8a159669a9d62d3d1e334a54621c266 Mon Sep 17 00:00:00 2001 From: sergul Date: Thu, 2 Nov 2017 19:30:43 -0400 Subject: [PATCH 14/16] more tests added, reshaped input --- sklearn/cluster/tests/test_feature_agglomeration.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sklearn/cluster/tests/test_feature_agglomeration.py b/sklearn/cluster/tests/test_feature_agglomeration.py index 7ac2dcc8b97a6..4feedbffbe1d8 100644 --- a/sklearn/cluster/tests/test_feature_agglomeration.py +++ b/sklearn/cluster/tests/test_feature_agglomeration.py @@ -10,7 +10,7 @@ def test_feature_agglomeration(): n_clusters = 1 - X = np.array([0, 0, 1], ndmin=2) # (n_samples, n_features) + X = np.array([0, 0, 1]).reshape(1, 3) # (n_samples, n_features) agglo_mean = FeatureAgglomeration(n_clusters=n_clusters, pooling_func=np.mean) @@ -20,12 +20,16 @@ def test_feature_agglomeration(): agglo_median.fit(X) assert_true(np.size(np.unique(agglo_mean.labels_)) == n_clusters) assert_true(np.size(np.unique(agglo_median.labels_)) == n_clusters) + assert_true(np.size(agglo_mean.labels_) == X.shape[1]) + assert_true(np.size(agglo_median.labels_) == X.shape[1]) # Test transform Xt_mean = agglo_mean.transform(X) Xt_median = agglo_median.transform(X) assert_true(Xt_mean.shape[1] == n_clusters) assert_true(Xt_median.shape[1] == n_clusters) + assert_true(Xt_mean == np.array([1/3.])) + assert_true(Xt_median == np.array([0.])) # Test inverse transform X_full_mean = agglo_mean.inverse_transform(Xt_mean) From a35000b22dadcee507ad6137e3256cdd65ad8f58 Mon Sep 17 00:00:00 2001 From: sergul Date: Sat, 4 Nov 2017 15:24:06 -0400 Subject: [PATCH 15/16] added spaces before and after div operator --- sklearn/cluster/tests/test_feature_agglomeration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cluster/tests/test_feature_agglomeration.py b/sklearn/cluster/tests/test_feature_agglomeration.py index 4feedbffbe1d8..98d5dfc4b72ca 100644 --- a/sklearn/cluster/tests/test_feature_agglomeration.py +++ b/sklearn/cluster/tests/test_feature_agglomeration.py @@ -28,7 +28,7 @@ def test_feature_agglomeration(): Xt_median = agglo_median.transform(X) assert_true(Xt_mean.shape[1] == n_clusters) assert_true(Xt_median.shape[1] == n_clusters) - assert_true(Xt_mean == np.array([1/3.])) + assert_true(Xt_mean == np.array([1 / 3.])) assert_true(Xt_median == np.array([0.])) # Test inverse transform From 9684ebea5615c2e6e934f3a6df14d115dc4c88ca Mon Sep 17 00:00:00 2001 From: sergul Date: Sun, 5 Nov 2017 17:47:29 -0500 Subject: [PATCH 16/16] added spaces before after div op - 2 --- sklearn/cluster/_feature_agglomeration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index 4cddb1d41c638..b2b28497aedfa 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -49,7 +49,7 @@ def transform(self, X): size = np.bincount(self.labels_) n_samples = X.shape[0] # a fast way to compute the mean of grouped features - nX = np.array([np.bincount(self.labels_, X[i, :])/size + nX = np.array([np.bincount(self.labels_, X[i, :]) / size for i in range(n_samples)]) else: nX = []