From 2ca67a6e5de506f2c155ee2bf971405398615684 Mon Sep 17 00:00:00 2001 From: Micky774 Date: Thu, 26 May 2022 10:50:35 -0400 Subject: [PATCH 01/15] Reconciled changelog --- doc/whats_new/v1.1.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 5a5e3bc71bcc7..4c46c0d631f76 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -2,6 +2,23 @@ .. currentmodule:: sklearn +.. _changes_1_1_2: + +Version 1.1.2 +============= + +**In Development** + +Changelog +--------- + +:mod:`sklearn.cluster` +...................... + +- |Fix| Fixed a bug in :class:`cluster.Birch` that could trigger an error when splitting + a node if there are duplicates in the dataset. + :pr:`23395` by :user:`Jérémie du Boisberranger `. + .. _changes_1_1_1: Version 1.1.1 From 15024ac57e9ef0e5b2e9b4576184fd111060a391 Mon Sep 17 00:00:00 2001 From: Micky774 Date: Thu, 26 May 2022 11:13:11 -0400 Subject: [PATCH 02/15] Completed initial deprecation --- sklearn/cluster/_affinity_propagation.py | 51 ++++++++++++------- .../tests/test_affinity_propagation.py | 47 ++++++++++++----- 2 files changed, 69 insertions(+), 29 deletions(-) diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py index f0274b113a341..3282724291761 100644 --- a/sklearn/cluster/_affinity_propagation.py +++ b/sklearn/cluster/_affinity_propagation.py @@ -302,10 +302,14 @@ class AffinityPropagation(ClusterMixin, BaseEstimator): they will be set to the median of the input similarities. affinity : {'euclidean', 'precomputed'}, default='euclidean' - Which affinity to use. At the moment 'precomputed' and - ``euclidean`` are supported. 'euclidean' uses the + Which metric to use. At the moment 'precomputed' and + 'euclidean' are supported. 'euclidean' uses the negative squared euclidean distance between points. + .. deprecated:: 1.2 + `affinity` was deprecated in version 1.2 and will be renamed to `metric` + in 1.4. + verbose : bool, default=False Whether to be verbose. @@ -413,7 +417,8 @@ def __init__( convergence_iter=15, copy=True, preference=None, - affinity="euclidean", + affinity="deprecated", # TODO(1.4): Remove + metric=None, # TODO(1.4): Set default as "euclidean" verbose=False, random_state=None, ): @@ -425,21 +430,22 @@ def __init__( self.verbose = verbose self.preference = preference self.affinity = affinity + self.metric = metric self.random_state = random_state def _more_tags(self): - return {"pairwise": self.affinity == "precomputed"} + return {"pairwise": self.metric == "precomputed"} def fit(self, X, y=None): - """Fit the clustering from features, or affinity matrix. + """Fit the clustering from features, or distance matrix. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features), or \ array-like of shape (n_samples, n_samples) - Training instances to cluster, or similarities / affinities between - instances if ``affinity='precomputed'``. If a sparse feature matrix - is provided, it will be converted into a sparse ``csr_matrix``. + Training instances to cluster, or distances between instances if + ``metric='precomputed'``. If a sparse feature matrix is provided, + it will be converted into a sparse ``csr_matrix``. y : Ignored Not used, present here for API consistency by convention. @@ -449,19 +455,30 @@ def fit(self, X, y=None): self Returns the instance itself. """ - if self.affinity == "precomputed": + # TODO(1.4): Remove + self._metric = self.metric or self.affinity + if self.affinity != "deprecated": + warnings.warn( + "Attribute `affinity` was deprecated in version 1.2 and will be removed" + " in 1.4. Use `metric` instead", + DeprecationWarning, + ) + elif self._metric == "deprecated": + self._metric = "euclidean" + + if self._metric == "precomputed": accept_sparse = False else: accept_sparse = "csr" X = self._validate_data(X, accept_sparse=accept_sparse) - if self.affinity == "precomputed": + if self._metric == "precomputed": self.affinity_matrix_ = X - elif self.affinity == "euclidean": + elif self._metric == "euclidean": self.affinity_matrix_ = -euclidean_distances(X, squared=True) else: raise ValueError( - "Affinity must be 'precomputed' or 'euclidean'. Got %s instead" - % str(self.affinity) + "metric must be 'precomputed' or 'euclidean'. Got %s instead" + % str(self._metric) ) check_scalar( @@ -496,7 +513,7 @@ def fit(self, X, y=None): random_state=self.random_state, ) - if self.affinity != "precomputed": + if self.metric != "precomputed": self.cluster_centers_ = X[self.cluster_centers_indices_].copy() return self @@ -519,7 +536,7 @@ def predict(self, X): X = self._validate_data(X, reset=False, accept_sparse="csr") if not hasattr(self, "cluster_centers_"): raise ValueError( - "Predict method is not supported when affinity='precomputed'." + "Predict method is not supported when metric='precomputed'." ) if self.cluster_centers_.shape[0] > 0: @@ -535,14 +552,14 @@ def predict(self, X): return np.array([-1] * X.shape[0]) def fit_predict(self, X, y=None): - """Fit clustering from features/affinity matrix; return cluster labels. + """Fit clustering from features/distance matrix; return cluster labels. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features), or \ array-like of shape (n_samples, n_samples) Training instances to cluster, or similarities / affinities between - instances if ``affinity='precomputed'``. If a sparse feature matrix + instances if ``metric='precomputed'``. If a sparse feature matrix is provided, it will be converted into a sparse ``csr_matrix``. y : Ignored diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py index e5dc5d584266d..57012ab5b69b2 100644 --- a/sklearn/cluster/tests/test_affinity_propagation.py +++ b/sklearn/cluster/tests/test_affinity_propagation.py @@ -45,7 +45,7 @@ def test_affinity_propagation(): assert n_clusters == n_clusters_ af = AffinityPropagation( - preference=preference, affinity="precomputed", random_state=28 + preference=preference, metric="precomputed", random_state=28 ) labels_precomputed = af.fit(S).labels_ @@ -82,10 +82,11 @@ def test_affinity_propagation_affinity_shape(): (X, {"damping": 2}, ValueError, "damping == 2, must be < 1"), (X, {"max_iter": 0}, ValueError, "max_iter == 0, must be >= 1."), (X, {"convergence_iter": 0}, ValueError, "convergence_iter == 0, must be >= 1"), - (X, {"affinity": "unknown"}, ValueError, "Affinity must be"), + # TODO(1.4): Replace message "Affinity must be" with "metric must be" + (X, {"metric": "unknown"}, ValueError, "metric must be"), ( csr_matrix((3, 3)), - {"affinity": "precomputed"}, + {"metric": "precomputed"}, TypeError, "A sparse matrix was passed, but dense data is required", ), @@ -99,7 +100,7 @@ def test_affinity_propagation_params_validation(input, params, err_type, err_msg def test_affinity_propagation_predict(): # Test AffinityPropagation.predict - af = AffinityPropagation(affinity="euclidean", random_state=63) + af = AffinityPropagation(metric="euclidean", random_state=63) labels = af.fit_predict(X) labels2 = af.predict(X) assert_array_equal(labels, labels2) @@ -108,13 +109,13 @@ def test_affinity_propagation_predict(): def test_affinity_propagation_predict_error(): # Test exception in AffinityPropagation.predict # Not fitted. - af = AffinityPropagation(affinity="euclidean") + af = AffinityPropagation(metric="euclidean") with pytest.raises(ValueError): af.predict(X) # Predict not supported when affinity="precomputed". S = np.dot(X, X.T) - af = AffinityPropagation(affinity="precomputed", random_state=57) + af = AffinityPropagation(metric="precomputed", random_state=57) af.fit(S) with pytest.raises(ValueError): af.predict(X) @@ -186,7 +187,7 @@ def test_affinity_propagation_predict_non_convergence(): def test_affinity_propagation_non_convergence_regressiontest(): X = np.array([[1, 0, 0, 0, 0, 0], [0, 1, 1, 1, 0, 0], [0, 0, 1, 0, 0, 1]]) - af = AffinityPropagation(affinity="euclidean", max_iter=2, random_state=34) + af = AffinityPropagation(metric="euclidean", max_iter=2, random_state=34) msg = ( "Affinity propagation did not converge, this model may return degenerate" " cluster centers and labels." @@ -258,9 +259,7 @@ def test_affinity_propagation_float32(): X = np.array( [[1, 0, 0, 0], [0, 1, 1, 0], [0, 1, 1, 0], [0, 0, 0, 1]], dtype="float32" ) - afp = AffinityPropagation(preference=1, affinity="precomputed", random_state=0).fit( - X - ) + afp = AffinityPropagation(preference=1, metric="precomputed", random_state=0).fit(X) expected = np.array([0, 1, 1, 2]) assert_array_equal(afp.labels_, expected) @@ -268,7 +267,7 @@ def test_affinity_propagation_float32(): def test_sparse_input_for_predict(): # Test to make sure sparse inputs are accepted for predict # (non-regression test for issue #20049) - af = AffinityPropagation(affinity="euclidean", random_state=42) + af = AffinityPropagation(metric="euclidean", random_state=42) af.fit(X) labels = af.predict(csr_matrix((2, 2))) assert_array_equal(labels, (2, 2)) @@ -277,8 +276,32 @@ def test_sparse_input_for_predict(): def test_sparse_input_for_fit_predict(): # Test to make sure sparse inputs are accepted for fit_predict # (non-regression test for issue #20049) - af = AffinityPropagation(affinity="euclidean", random_state=42) + af = AffinityPropagation(metric="euclidean", random_state=42) rng = np.random.RandomState(42) X = csr_matrix(rng.randint(0, 2, size=(5, 5))) labels = af.fit_predict(X) assert_array_equal(labels, (0, 1, 1, 2, 3)) + + +# TODO(1.4): Remove +def test_deprecate_affinity(): + est = [ + AffinityPropagation(random_state=42), + AffinityPropagation(metric="euclidean", random_state=42), + ] + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + for af in est: + af.fit(X) + af.predict(X) + af.fit_predict(X) + + af = AffinityPropagation(affinity="euclidean", random_state=42) + msg = ( + "Attribute `affinity` was deprecated in version 1.2 and will be removed in 1.4." + " Use `metric` instead" + ) + with pytest.warns(DeprecationWarning, match=msg): + af.fit(X) + af.predict(X) + af.fit_predict(X) From 59d7ce6675843a566a74d1019b48804d3b7708f9 Mon Sep 17 00:00:00 2001 From: Micky774 Date: Thu, 26 May 2022 11:33:34 -0400 Subject: [PATCH 03/15] Fixed deprecation --- sklearn/cluster/_affinity_propagation.py | 51 +++++++------------ sklearn/cluster/_agglomerative.py | 35 ++++++++++--- .../tests/test_affinity_propagation.py | 47 +++++------------ sklearn/cluster/tests/test_hierarchical.py | 37 +++++++++++--- 4 files changed, 89 insertions(+), 81 deletions(-) diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py index 3282724291761..f0274b113a341 100644 --- a/sklearn/cluster/_affinity_propagation.py +++ b/sklearn/cluster/_affinity_propagation.py @@ -302,14 +302,10 @@ class AffinityPropagation(ClusterMixin, BaseEstimator): they will be set to the median of the input similarities. affinity : {'euclidean', 'precomputed'}, default='euclidean' - Which metric to use. At the moment 'precomputed' and - 'euclidean' are supported. 'euclidean' uses the + Which affinity to use. At the moment 'precomputed' and + ``euclidean`` are supported. 'euclidean' uses the negative squared euclidean distance between points. - .. deprecated:: 1.2 - `affinity` was deprecated in version 1.2 and will be renamed to `metric` - in 1.4. - verbose : bool, default=False Whether to be verbose. @@ -417,8 +413,7 @@ def __init__( convergence_iter=15, copy=True, preference=None, - affinity="deprecated", # TODO(1.4): Remove - metric=None, # TODO(1.4): Set default as "euclidean" + affinity="euclidean", verbose=False, random_state=None, ): @@ -430,22 +425,21 @@ def __init__( self.verbose = verbose self.preference = preference self.affinity = affinity - self.metric = metric self.random_state = random_state def _more_tags(self): - return {"pairwise": self.metric == "precomputed"} + return {"pairwise": self.affinity == "precomputed"} def fit(self, X, y=None): - """Fit the clustering from features, or distance matrix. + """Fit the clustering from features, or affinity matrix. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features), or \ array-like of shape (n_samples, n_samples) - Training instances to cluster, or distances between instances if - ``metric='precomputed'``. If a sparse feature matrix is provided, - it will be converted into a sparse ``csr_matrix``. + Training instances to cluster, or similarities / affinities between + instances if ``affinity='precomputed'``. If a sparse feature matrix + is provided, it will be converted into a sparse ``csr_matrix``. y : Ignored Not used, present here for API consistency by convention. @@ -455,30 +449,19 @@ def fit(self, X, y=None): self Returns the instance itself. """ - # TODO(1.4): Remove - self._metric = self.metric or self.affinity - if self.affinity != "deprecated": - warnings.warn( - "Attribute `affinity` was deprecated in version 1.2 and will be removed" - " in 1.4. Use `metric` instead", - DeprecationWarning, - ) - elif self._metric == "deprecated": - self._metric = "euclidean" - - if self._metric == "precomputed": + if self.affinity == "precomputed": accept_sparse = False else: accept_sparse = "csr" X = self._validate_data(X, accept_sparse=accept_sparse) - if self._metric == "precomputed": + if self.affinity == "precomputed": self.affinity_matrix_ = X - elif self._metric == "euclidean": + elif self.affinity == "euclidean": self.affinity_matrix_ = -euclidean_distances(X, squared=True) else: raise ValueError( - "metric must be 'precomputed' or 'euclidean'. Got %s instead" - % str(self._metric) + "Affinity must be 'precomputed' or 'euclidean'. Got %s instead" + % str(self.affinity) ) check_scalar( @@ -513,7 +496,7 @@ def fit(self, X, y=None): random_state=self.random_state, ) - if self.metric != "precomputed": + if self.affinity != "precomputed": self.cluster_centers_ = X[self.cluster_centers_indices_].copy() return self @@ -536,7 +519,7 @@ def predict(self, X): X = self._validate_data(X, reset=False, accept_sparse="csr") if not hasattr(self, "cluster_centers_"): raise ValueError( - "Predict method is not supported when metric='precomputed'." + "Predict method is not supported when affinity='precomputed'." ) if self.cluster_centers_.shape[0] > 0: @@ -552,14 +535,14 @@ def predict(self, X): return np.array([-1] * X.shape[0]) def fit_predict(self, X, y=None): - """Fit clustering from features/distance matrix; return cluster labels. + """Fit clustering from features/affinity matrix; return cluster labels. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features), or \ array-like of shape (n_samples, n_samples) Training instances to cluster, or similarities / affinities between - instances if ``metric='precomputed'``. If a sparse feature matrix + instances if ``affinity='precomputed'``. If a sparse feature matrix is provided, it will be converted into a sparse ``csr_matrix``. y : Ignored diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index b399f805a9d40..9c240a8fcd5ed 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -755,6 +755,10 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator): If "precomputed", a distance matrix (instead of a similarity matrix) is needed as input for the fit method. + .. deprecated:: 1.2 + `affinity` was deprecated in version 1.2 and will be renamed to `metric` + in 1.4. + memory : str or object with the joblib.Memory interface, default=None Used to cache the output of the computation of the tree. By default, no caching is done. If a string is given, it is the @@ -876,7 +880,8 @@ def __init__( self, n_clusters=2, *, - affinity="euclidean", + affinity="deprecated", # TODO(1.4): Remove + metric=None, # TODO(1.4): Set default as "euclidean" memory=None, connectivity=None, compute_full_tree="auto", @@ -891,6 +896,7 @@ def __init__( self.compute_full_tree = compute_full_tree self.linkage = linkage self.affinity = affinity + self.metric = metric self.compute_distances = compute_distances def fit(self, X, y=None): @@ -901,7 +907,7 @@ def fit(self, X, y=None): X : array-like, shape (n_samples, n_features) or \ (n_samples, n_samples) Training instances to cluster, or distances between instances if - ``affinity='precomputed'``. + ``metric='precomputed'``. y : Ignored Not used, present here for API consistency by convention. @@ -930,6 +936,17 @@ def _fit(self, X): """ memory = check_memory(self.memory) + # TODO(1.4): Remove + self._metric = self.metric or self.affinity + if self.affinity != "deprecated": + warnings.warn( + "Attribute `affinity` was deprecated in version 1.2 and will be removed" + " in 1.4. Use `metric` instead", + DeprecationWarning, + ) + elif self._metric == "deprecated": + self._metric = "euclidean" + if self.n_clusters is not None and self.n_clusters <= 0: raise ValueError( "n_clusters should be an integer greater than 0. %s was provided." @@ -948,10 +965,10 @@ def _fit(self, X): "compute_full_tree must be True if distance_threshold is set." ) - if self.linkage == "ward" and self.affinity != "euclidean": + if self.linkage == "ward" and self._metric != "euclidean": raise ValueError( "%s was provided as affinity. Ward can only " - "work with euclidean distances." % (self.affinity,) + "work with euclidean distances." % (self._metric,) ) if self.linkage not in _TREE_BUILDERS: @@ -989,7 +1006,7 @@ def _fit(self, X): kwargs = {} if self.linkage != "ward": kwargs["linkage"] = self.linkage - kwargs["affinity"] = self.affinity + kwargs["affinity"] = self._metric distance_threshold = self.distance_threshold @@ -1071,6 +1088,10 @@ class FeatureAgglomeration( "manhattan", "cosine", or 'precomputed'. If linkage is "ward", only "euclidean" is accepted. + .. deprecated:: 1.2 + `affinity` was deprecated in version 1.2 and will be renamed to `metric` + in 1.4. + memory : str or object with the joblib.Memory interface, default=None Used to cache the output of the computation of the tree. By default, no caching is done. If a string is given, it is the @@ -1196,7 +1217,8 @@ def __init__( self, n_clusters=2, *, - affinity="euclidean", + affinity="deprecated", # TODO(1.4): Remove + metric=None, # TODO(1.4): Set default as "euclidean" memory=None, connectivity=None, compute_full_tree="auto", @@ -1212,6 +1234,7 @@ def __init__( compute_full_tree=compute_full_tree, linkage=linkage, affinity=affinity, + metric=metric, distance_threshold=distance_threshold, compute_distances=compute_distances, ) diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py index 57012ab5b69b2..e5dc5d584266d 100644 --- a/sklearn/cluster/tests/test_affinity_propagation.py +++ b/sklearn/cluster/tests/test_affinity_propagation.py @@ -45,7 +45,7 @@ def test_affinity_propagation(): assert n_clusters == n_clusters_ af = AffinityPropagation( - preference=preference, metric="precomputed", random_state=28 + preference=preference, affinity="precomputed", random_state=28 ) labels_precomputed = af.fit(S).labels_ @@ -82,11 +82,10 @@ def test_affinity_propagation_affinity_shape(): (X, {"damping": 2}, ValueError, "damping == 2, must be < 1"), (X, {"max_iter": 0}, ValueError, "max_iter == 0, must be >= 1."), (X, {"convergence_iter": 0}, ValueError, "convergence_iter == 0, must be >= 1"), - # TODO(1.4): Replace message "Affinity must be" with "metric must be" - (X, {"metric": "unknown"}, ValueError, "metric must be"), + (X, {"affinity": "unknown"}, ValueError, "Affinity must be"), ( csr_matrix((3, 3)), - {"metric": "precomputed"}, + {"affinity": "precomputed"}, TypeError, "A sparse matrix was passed, but dense data is required", ), @@ -100,7 +99,7 @@ def test_affinity_propagation_params_validation(input, params, err_type, err_msg def test_affinity_propagation_predict(): # Test AffinityPropagation.predict - af = AffinityPropagation(metric="euclidean", random_state=63) + af = AffinityPropagation(affinity="euclidean", random_state=63) labels = af.fit_predict(X) labels2 = af.predict(X) assert_array_equal(labels, labels2) @@ -109,13 +108,13 @@ def test_affinity_propagation_predict(): def test_affinity_propagation_predict_error(): # Test exception in AffinityPropagation.predict # Not fitted. - af = AffinityPropagation(metric="euclidean") + af = AffinityPropagation(affinity="euclidean") with pytest.raises(ValueError): af.predict(X) # Predict not supported when affinity="precomputed". S = np.dot(X, X.T) - af = AffinityPropagation(metric="precomputed", random_state=57) + af = AffinityPropagation(affinity="precomputed", random_state=57) af.fit(S) with pytest.raises(ValueError): af.predict(X) @@ -187,7 +186,7 @@ def test_affinity_propagation_predict_non_convergence(): def test_affinity_propagation_non_convergence_regressiontest(): X = np.array([[1, 0, 0, 0, 0, 0], [0, 1, 1, 1, 0, 0], [0, 0, 1, 0, 0, 1]]) - af = AffinityPropagation(metric="euclidean", max_iter=2, random_state=34) + af = AffinityPropagation(affinity="euclidean", max_iter=2, random_state=34) msg = ( "Affinity propagation did not converge, this model may return degenerate" " cluster centers and labels." @@ -259,7 +258,9 @@ def test_affinity_propagation_float32(): X = np.array( [[1, 0, 0, 0], [0, 1, 1, 0], [0, 1, 1, 0], [0, 0, 0, 1]], dtype="float32" ) - afp = AffinityPropagation(preference=1, metric="precomputed", random_state=0).fit(X) + afp = AffinityPropagation(preference=1, affinity="precomputed", random_state=0).fit( + X + ) expected = np.array([0, 1, 1, 2]) assert_array_equal(afp.labels_, expected) @@ -267,7 +268,7 @@ def test_affinity_propagation_float32(): def test_sparse_input_for_predict(): # Test to make sure sparse inputs are accepted for predict # (non-regression test for issue #20049) - af = AffinityPropagation(metric="euclidean", random_state=42) + af = AffinityPropagation(affinity="euclidean", random_state=42) af.fit(X) labels = af.predict(csr_matrix((2, 2))) assert_array_equal(labels, (2, 2)) @@ -276,32 +277,8 @@ def test_sparse_input_for_predict(): def test_sparse_input_for_fit_predict(): # Test to make sure sparse inputs are accepted for fit_predict # (non-regression test for issue #20049) - af = AffinityPropagation(metric="euclidean", random_state=42) + af = AffinityPropagation(affinity="euclidean", random_state=42) rng = np.random.RandomState(42) X = csr_matrix(rng.randint(0, 2, size=(5, 5))) labels = af.fit_predict(X) assert_array_equal(labels, (0, 1, 1, 2, 3)) - - -# TODO(1.4): Remove -def test_deprecate_affinity(): - est = [ - AffinityPropagation(random_state=42), - AffinityPropagation(metric="euclidean", random_state=42), - ] - with warnings.catch_warnings(): - warnings.simplefilter("error", DeprecationWarning) - for af in est: - af.fit(X) - af.predict(X) - af.fit_predict(X) - - af = AffinityPropagation(affinity="euclidean", random_state=42) - msg = ( - "Attribute `affinity` was deprecated in version 1.2 and will be removed in 1.4." - " Use `metric` instead" - ) - with pytest.warns(DeprecationWarning, match=msg): - af.fit(X) - af.predict(X) - af.fit_predict(X) diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index c6607779f80fc..b49d1adc9a4b5 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -8,6 +8,7 @@ import itertools from tempfile import mkdtemp import shutil +import warnings import pytest from functools import partial @@ -243,7 +244,7 @@ def test_agglomerative_clustering(): clustering = AgglomerativeClustering( n_clusters=10, connectivity=connectivity.toarray(), - affinity="manhattan", + metric="manhattan", linkage="ward", ) with pytest.raises(ValueError): @@ -255,12 +256,12 @@ def test_agglomerative_clustering(): clustering = AgglomerativeClustering( n_clusters=10, connectivity=np.ones((n_samples, n_samples)), - affinity=affinity, + metric=affinity, linkage="complete", ) clustering.fit(X) clustering2 = AgglomerativeClustering( - n_clusters=10, connectivity=None, affinity=affinity, linkage="complete" + n_clusters=10, connectivity=None, metric=affinity, linkage="complete" ) clustering2.fit(X) assert_almost_equal( @@ -277,7 +278,7 @@ def test_agglomerative_clustering(): clustering2 = AgglomerativeClustering( n_clusters=10, connectivity=connectivity, - affinity="precomputed", + metric="precomputed", linkage="complete", ) clustering2.fit(X_dist) @@ -291,7 +292,7 @@ def test_agglomerative_clustering_memory_mapped(): """ rng = np.random.RandomState(0) Xmm = create_memmap_backed_data(rng.randn(50, 100)) - AgglomerativeClustering(affinity="euclidean", linkage="single").fit(Xmm) + AgglomerativeClustering(metric="euclidean", linkage="single").fit(Xmm) def test_ward_agglomeration(): @@ -876,7 +877,7 @@ def test_invalid_shape_precomputed_dist_matrix(): ValueError, match=r"Distance matrix should be square, got matrix of shape \(5, 3\)", ): - AgglomerativeClustering(affinity="precomputed", linkage="complete").fit(X) + AgglomerativeClustering(metric="precomputed", linkage="complete").fit(X) def test_precomputed_connectivity_affinity_with_2_connected_components(): @@ -916,3 +917,27 @@ def test_precomputed_connectivity_affinity_with_2_connected_components(): assert_array_equal(clusterer.labels_, clusterer_precomputed.labels_) assert_array_equal(clusterer.children_, clusterer_precomputed.children_) + + +# TODO(1.4): Remove +def test_deprecate_affinity(): + rng = np.random.RandomState(42) + X = rng.randn(50, 10) + est = [ + AgglomerativeClustering(), + AgglomerativeClustering(metric="euclidean"), + ] + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + for af in est: + af.fit(X) + af.fit_predict(X) + + af = AgglomerativeClustering(affinity="euclidean") + msg = ( + "Attribute `affinity` was deprecated in version 1.2 and will be removed in 1.4." + " Use `metric` instead" + ) + with pytest.warns(DeprecationWarning, match=msg): + af.fit(X) + af.fit_predict(X) From 07bb1d973922bb0a37c542945f82c0cf4a479c3e Mon Sep 17 00:00:00 2001 From: Micky774 Date: Thu, 26 May 2022 13:23:00 -0400 Subject: [PATCH 04/15] Added changelog entry --- doc/whats_new/v1.2.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index b4804f3c9c8b9..4d94d496acc7c 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -59,6 +59,10 @@ Changelog and both will have their defaults changed to `n_init='auto'` in 1.4. :pr:`23038` by :user:`Meekail Zain `. +- |API| The `affinity` attribute is now deprecated for + :class:`cluster.AgglomerativeClustering` and will be renamed to `metric` in v1.4. + :pr:`23470` by :user:`Meekail Zain `. + :mod:`sklearn.datasets` ....................... From 8ec54999c494be7487f4fd2acc7adbaa3093ad37 Mon Sep 17 00:00:00 2001 From: Micky774 Date: Fri, 27 May 2022 09:58:01 -0400 Subject: [PATCH 05/15] Fixed docstring --- sklearn/cluster/_agglomerative.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index 9c240a8fcd5ed..32752b09c638e 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -755,9 +755,16 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator): If "precomputed", a distance matrix (instead of a similarity matrix) is needed as input for the fit method. - .. deprecated:: 1.2 - `affinity` was deprecated in version 1.2 and will be renamed to `metric` - in 1.4. + .. deprecated:: 1.2 + `affinity` was deprecated in version 1.2 and will be renamed to `metric` + in 1.4. + + metric : str or callable, default='euclidean' + Metric used to compute the linkage. Can be "euclidean", "l1", "l2", + "manhattan", "cosine", or "precomputed". + If linkage is "ward", only "euclidean" is accepted. + If "precomputed", a distance matrix (instead of a similarity matrix) + is needed as input for the fit method. memory : str or object with the joblib.Memory interface, default=None Used to cache the output of the computation of the tree. @@ -1088,9 +1095,14 @@ class FeatureAgglomeration( "manhattan", "cosine", or 'precomputed'. If linkage is "ward", only "euclidean" is accepted. - .. deprecated:: 1.2 - `affinity` was deprecated in version 1.2 and will be renamed to `metric` - in 1.4. + .. deprecated:: 1.2 + `affinity` was deprecated in version 1.2 and will be renamed to `metric` + in 1.4. + + metric : str or callable, default='euclidean' + Metric used to compute the linkage. Can be "euclidean", "l1", "l2", + "manhattan", "cosine", or 'precomputed'. + If linkage is "ward", only "euclidean" is accepted. memory : str or object with the joblib.Memory interface, default=None Used to cache the output of the computation of the tree. From b6247254d365fc75c59b304cf24525b26c0df8e6 Mon Sep 17 00:00:00 2001 From: Micky774 Date: Fri, 27 May 2022 10:02:29 -0400 Subject: [PATCH 06/15] Corrected default param and validation --- sklearn/cluster/_agglomerative.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index 32752b09c638e..c29a74237f532 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -888,7 +888,7 @@ def __init__( n_clusters=2, *, affinity="deprecated", # TODO(1.4): Remove - metric=None, # TODO(1.4): Set default as "euclidean" + metric="euclidean", memory=None, connectivity=None, compute_full_tree="auto", @@ -943,16 +943,15 @@ def _fit(self, X): """ memory = check_memory(self.memory) + self._metric = self.metric # TODO(1.4): Remove - self._metric = self.metric or self.affinity if self.affinity != "deprecated": warnings.warn( "Attribute `affinity` was deprecated in version 1.2 and will be removed" " in 1.4. Use `metric` instead", DeprecationWarning, ) - elif self._metric == "deprecated": - self._metric = "euclidean" + self._metric = self.affinity if self.n_clusters is not None and self.n_clusters <= 0: raise ValueError( @@ -1230,7 +1229,7 @@ def __init__( n_clusters=2, *, affinity="deprecated", # TODO(1.4): Remove - metric=None, # TODO(1.4): Set default as "euclidean" + metric="euclidean", memory=None, connectivity=None, compute_full_tree="auto", From 27773357320f9c370c5efb20455678ad0d6bfccd Mon Sep 17 00:00:00 2001 From: Micky774 Date: Fri, 27 May 2022 11:20:36 -0400 Subject: [PATCH 07/15] Fixed spacing in docstring --- sklearn/cluster/_agglomerative.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index c29a74237f532..d46f85ae8def9 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -756,8 +756,8 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator): is needed as input for the fit method. .. deprecated:: 1.2 - `affinity` was deprecated in version 1.2 and will be renamed to `metric` - in 1.4. + `affinity` was deprecated in version 1.2 and will be renamed to + `metric` in 1.4. metric : str or callable, default='euclidean' Metric used to compute the linkage. Can be "euclidean", "l1", "l2", @@ -1095,8 +1095,8 @@ class FeatureAgglomeration( If linkage is "ward", only "euclidean" is accepted. .. deprecated:: 1.2 - `affinity` was deprecated in version 1.2 and will be renamed to `metric` - in 1.4. + `affinity` was deprecated in version 1.2 and will be renamed to + `metric` in 1.4. metric : str or callable, default='euclidean' Metric used to compute the linkage. Can be "euclidean", "l1", "l2", From 2242749e6b37c850b6768587f05a692016d919fb Mon Sep 17 00:00:00 2001 From: Micky774 Date: Wed, 1 Jun 2022 21:21:23 -0400 Subject: [PATCH 08/15] Addressed reviewer feedback --- sklearn/cluster/_agglomerative.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index d46f85ae8def9..c527c6ce37e5b 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -766,6 +766,8 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator): If "precomputed", a distance matrix (instead of a similarity matrix) is needed as input for the fit method. + .. versionadded:: 1.2 + memory : str or object with the joblib.Memory interface, default=None Used to cache the output of the computation of the tree. By default, no caching is done. If a string is given, it is the @@ -973,7 +975,7 @@ def _fit(self, X): if self.linkage == "ward" and self._metric != "euclidean": raise ValueError( - "%s was provided as affinity. Ward can only " + "%s was provided as metric. Ward can only " "work with euclidean distances." % (self._metric,) ) @@ -1103,6 +1105,8 @@ class FeatureAgglomeration( "manhattan", "cosine", or 'precomputed'. If linkage is "ward", only "euclidean" is accepted. + .. versionadded:: 1.2 + memory : str or object with the joblib.Memory interface, default=None Used to cache the output of the computation of the tree. By default, no caching is done. If a string is given, it is the From 14196dcac1f811651224b0bae698816f2d833f7e Mon Sep 17 00:00:00 2001 From: Meekail Zain <34613774+Micky774@users.noreply.github.com> Date: Mon, 6 Jun 2022 13:06:47 -0400 Subject: [PATCH 09/15] Update sklearn/cluster/_agglomerative.py Co-authored-by: Thomas J. Fan --- sklearn/cluster/_agglomerative.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index c527c6ce37e5b..b84b6db039861 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -951,7 +951,7 @@ def _fit(self, X): warnings.warn( "Attribute `affinity` was deprecated in version 1.2 and will be removed" " in 1.4. Use `metric` instead", - DeprecationWarning, + FutureWarning, ) self._metric = self.affinity From b9986d888f6374b80c3eadae049418e10aae89d6 Mon Sep 17 00:00:00 2001 From: Micky774 Date: Mon, 6 Jun 2022 13:16:25 -0400 Subject: [PATCH 10/15] Updated test --- sklearn/cluster/tests/test_hierarchical.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index b49d1adc9a4b5..23eba480eb879 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -928,7 +928,7 @@ def test_deprecate_affinity(): AgglomerativeClustering(metric="euclidean"), ] with warnings.catch_warnings(): - warnings.simplefilter("error", DeprecationWarning) + warnings.simplefilter("error", FutureWarning) for af in est: af.fit(X) af.fit_predict(X) @@ -938,6 +938,6 @@ def test_deprecate_affinity(): "Attribute `affinity` was deprecated in version 1.2 and will be removed in 1.4." " Use `metric` instead" ) - with pytest.warns(DeprecationWarning, match=msg): + with pytest.warns(FutureWarning, match=msg): af.fit(X) af.fit_predict(X) From f5ffe9042872bf4f62047c939c1885ad3e88bc6b Mon Sep 17 00:00:00 2001 From: Micky774 Date: Thu, 9 Jun 2022 12:43:37 -0400 Subject: [PATCH 11/15] Added error handling and testing for edge-case --- sklearn/cluster/_agglomerative.py | 12 ++++++++++-- sklearn/cluster/tests/test_hierarchical.py | 8 ++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index b84b6db039861..8488355cc4e3d 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -890,7 +890,7 @@ def __init__( n_clusters=2, *, affinity="deprecated", # TODO(1.4): Remove - metric="euclidean", + metric=None, # TODO(1.4): Set to "euclidean" memory=None, connectivity=None, compute_full_tree="auto", @@ -948,12 +948,20 @@ def _fit(self, X): self._metric = self.metric # TODO(1.4): Remove if self.affinity != "deprecated": + if self.metric is not None: + raise ValueError( + "Both `affinity` and `metric` attributes were set. Attribute" + " `affinity` was deprecated in version 1.2 and will be removed in" + " 1.4. To avoid this error, only set the `metric` attribute." + ) warnings.warn( "Attribute `affinity` was deprecated in version 1.2 and will be removed" " in 1.4. Use `metric` instead", FutureWarning, ) self._metric = self.affinity + elif self.metric is None: + self._metric = "euclidean" if self.n_clusters is not None and self.n_clusters <= 0: raise ValueError( @@ -1233,7 +1241,7 @@ def __init__( n_clusters=2, *, affinity="deprecated", # TODO(1.4): Remove - metric="euclidean", + metric=None, # TODO(1.4): Set to "euclidean" memory=None, connectivity=None, compute_full_tree="auto", diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index 23eba480eb879..32a3e0070bcfe 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -940,4 +940,12 @@ def test_deprecate_affinity(): ) with pytest.warns(FutureWarning, match=msg): af.fit(X) + with pytest.warns(FutureWarning, match=msg): + af.fit_predict(X) + + af = AgglomerativeClustering(metric="euclidean", affinity="euclidean") + msg = "Both `affinity` and `metric` attributes were set. Attribute" + with pytest.raises(ValueError, match=msg): + af.fit(X) + with pytest.raises(ValueError, match=msg): af.fit_predict(X) From dbcb021b2e5d26f8534fa016cfbad687da7397d8 Mon Sep 17 00:00:00 2001 From: Meekail Zain <34613774+Micky774@users.noreply.github.com> Date: Thu, 30 Jun 2022 16:32:09 -0400 Subject: [PATCH 12/15] Apply suggestions from code review Co-authored-by: Guillaume Lemaitre --- sklearn/cluster/_agglomerative.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index 02fdf30a7d7e6..1932231593505 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -764,7 +764,7 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator): `affinity` was deprecated in version 1.2 and will be renamed to `metric` in 1.4. - metric : str or callable, default='euclidean' + metric : str or callable, default=None Metric used to compute the linkage. Can be "euclidean", "l1", "l2", "manhattan", "cosine", or "precomputed". If linkage is "ward", only "euclidean" is accepted. @@ -989,11 +989,6 @@ def _fit(self, X): elif self.metric is None: self._metric = "euclidean" - if self.n_clusters is not None and self.n_clusters <= 0: - raise ValueError( - "n_clusters should be an integer greater than 0. %s was provided." - % str(self.n_clusters) - ) if not ((self.n_clusters is None) ^ (self.distance_threshold is None)): raise ValueError( @@ -1009,8 +1004,8 @@ def _fit(self, X): if self.linkage == "ward" and self._metric != "euclidean": raise ValueError( - "%s was provided as metric. Ward can only " - "work with euclidean distances." % (self._metric,) + f"{self._metric} was provided as metric. Ward can only " + "work with euclidean distances." ) tree_builder = _TREE_BUILDERS[self.linkage] From 294946acfd86e9fa575d5bd08e5e4aed8558ab40 Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Thu, 30 Jun 2022 16:35:55 -0400 Subject: [PATCH 13/15] Streamlined test and renamed var --- sklearn/cluster/_agglomerative.py | 17 +++++++++-------- sklearn/cluster/tests/test_hierarchical.py | 16 +++------------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index 1932231593505..cd869aa44b0e3 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -766,10 +766,10 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator): metric : str or callable, default=None Metric used to compute the linkage. Can be "euclidean", "l1", "l2", - "manhattan", "cosine", or "precomputed". - If linkage is "ward", only "euclidean" is accepted. - If "precomputed", a distance matrix (instead of a similarity matrix) - is needed as input for the fit method. + "manhattan", "cosine", or "precomputed". If set to `None` then + "euclidean" is used. If linkage is "ward", only "euclidean" is + accepted. If "precomputed", a distance matrix (instead of a similarity + matrix) is needed as input for the fit method. .. versionadded:: 1.2 @@ -989,7 +989,6 @@ def _fit(self, X): elif self.metric is None: self._metric = "euclidean" - if not ((self.n_clusters is None) ^ (self.distance_threshold is None)): raise ValueError( "Exactly one of n_clusters and " @@ -1128,10 +1127,12 @@ class FeatureAgglomeration( `affinity` was deprecated in version 1.2 and will be renamed to `metric` in 1.4. - metric : str or callable, default='euclidean' + metric : str or callable, default=None Metric used to compute the linkage. Can be "euclidean", "l1", "l2", - "manhattan", "cosine", or 'precomputed'. - If linkage is "ward", only "euclidean" is accepted. + "manhattan", "cosine", or "precomputed". If set to `None` then + "euclidean" is used. If linkage is "ward", only "euclidean" is + accepted. If "precomputed", a distance matrix (instead of a similarity + matrix) is needed as input for the fit method. .. versionadded:: 1.2 diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index 01cac453e5b97..012073d1ba332 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -8,7 +8,6 @@ import itertools from tempfile import mkdtemp import shutil -import warnings import pytest from functools import partial @@ -249,17 +248,17 @@ def test_agglomerative_clustering(): clustering.fit(X) # Test using another metric than euclidean works with linkage complete - for affinity in PAIRED_DISTANCES.keys(): + for metric in PAIRED_DISTANCES.keys(): # Compare our (structured) implementation to scipy clustering = AgglomerativeClustering( n_clusters=10, connectivity=np.ones((n_samples, n_samples)), - metric=affinity, + metric=metric, linkage="complete", ) clustering.fit(X) clustering2 = AgglomerativeClustering( - n_clusters=10, connectivity=None, metric=affinity, linkage="complete" + n_clusters=10, connectivity=None, metric=metric, linkage="complete" ) clustering2.fit(X) assert_almost_equal( @@ -907,15 +906,6 @@ def test_precomputed_connectivity_affinity_with_2_connected_components(): def test_deprecate_affinity(): rng = np.random.RandomState(42) X = rng.randn(50, 10) - est = [ - AgglomerativeClustering(), - AgglomerativeClustering(metric="euclidean"), - ] - with warnings.catch_warnings(): - warnings.simplefilter("error", FutureWarning) - for af in est: - af.fit(X) - af.fit_predict(X) af = AgglomerativeClustering(affinity="euclidean") msg = ( From e62ed6aac96a63484da3d7094e05444f587476ed Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Mon, 4 Jul 2022 16:41:07 -0400 Subject: [PATCH 14/15] Clarified docstring per review feedback --- sklearn/cluster/_agglomerative.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index cd869aa44b0e3..90ee3336e0478 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -768,8 +768,8 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator): Metric used to compute the linkage. Can be "euclidean", "l1", "l2", "manhattan", "cosine", or "precomputed". If set to `None` then "euclidean" is used. If linkage is "ward", only "euclidean" is - accepted. If "precomputed", a distance matrix (instead of a similarity - matrix) is needed as input for the fit method. + accepted. If "precomputed", a distance matrix is needed as input for + the fit method. .. versionadded:: 1.2 @@ -1131,8 +1131,8 @@ class FeatureAgglomeration( Metric used to compute the linkage. Can be "euclidean", "l1", "l2", "manhattan", "cosine", or "precomputed". If set to `None` then "euclidean" is used. If linkage is "ward", only "euclidean" is - accepted. If "precomputed", a distance matrix (instead of a similarity - matrix) is needed as input for the fit method. + accepted. If "precomputed", a distance matrix is needed as input for + the fit method. .. versionadded:: 1.2 From 43ff645f193a33423c7119c297a7b0e7b03c5280 Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Wed, 6 Jul 2022 09:56:53 -0400 Subject: [PATCH 15/15] Updated affinity-->metric in plotting --- examples/cluster/plot_agglomerative_clustering_metrics.py | 4 ++-- examples/cluster/plot_cluster_comparison.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/cluster/plot_agglomerative_clustering_metrics.py b/examples/cluster/plot_agglomerative_clustering_metrics.py index e022035ca2207..38fd3682d48ec 100644 --- a/examples/cluster/plot_agglomerative_clustering_metrics.py +++ b/examples/cluster/plot_agglomerative_clustering_metrics.py @@ -125,7 +125,7 @@ def sqr(x): # Plot clustering results for index, metric in enumerate(["cosine", "euclidean", "cityblock"]): model = AgglomerativeClustering( - n_clusters=n_clusters, linkage="average", affinity=metric + n_clusters=n_clusters, linkage="average", metric=metric ) model.fit(X) plt.figure() @@ -134,7 +134,7 @@ def sqr(x): plt.plot(X[model.labels_ == l].T, c=c, alpha=0.5) plt.axis("tight") plt.axis("off") - plt.suptitle("AgglomerativeClustering(affinity=%s)" % metric, size=20) + plt.suptitle("AgglomerativeClustering(metric=%s)" % metric, size=20) plt.show() diff --git a/examples/cluster/plot_cluster_comparison.py b/examples/cluster/plot_cluster_comparison.py index 8b52759c79018..a9e39267411b7 100644 --- a/examples/cluster/plot_cluster_comparison.py +++ b/examples/cluster/plot_cluster_comparison.py @@ -171,7 +171,7 @@ ) average_linkage = cluster.AgglomerativeClustering( linkage="average", - affinity="cityblock", + metric="cityblock", n_clusters=params["n_clusters"], connectivity=connectivity, )