From 7b92e27bb64df53589600d4443597414f60b4983 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Fri, 16 Jul 2021 11:23:06 -0400
Subject: [PATCH 01/31] initial cleanup: parameters in backticks, spelling, etc

---
 maint_tools/test_docstrings.py    |  2 +-
 sklearn/cluster/_agglomerative.py | 50 ++++++++++++++++---------------
 2 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 587190401c61e..46da597868cdb 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -10,7 +10,7 @@
 # List of modules ignored when checking for numpydoc validation.
 DOCSTRING_IGNORE_LIST = [
     "AdditiveChi2Sampler",
-    "AgglomerativeClustering",
+    #"AgglomerativeClustering",
     "BernoulliRBM",
     "Birch",
     "CCA",
diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 16b0ff83d6d65..a2ca7d267d9b2 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -160,7 +160,7 @@ def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False):
     Parameters
     ----------
     X : array-like of shape (n_samples, n_features)
-        feature matrix representing n_samples samples to be clustered
+        feature matrix representing `n_samples` samples to be clustered
 
     connectivity : sparse matrix, default=None
         connectivity matrix. Defines for each sample the neighboring samples
@@ -169,12 +169,13 @@ def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False):
         Default is None, i.e, the Ward algorithm is unstructured.
 
     n_clusters : int, default=None
-        Stop early the construction of the tree at n_clusters. This is
-        useful to decrease computation time if the number of clusters is
-        not small compared to the number of samples. In this case, the
-        complete tree is not computed, thus the 'children' output is of
-        limited use, and the 'parents' output should rather be used.
-        This option is valid only when specifying a connectivity matrix.
+        `n_clusters` should be less than `n_samples`.  Stop early the
+        construction of the tree at `n_clusters.` This is useful to decrease
+        computation time if the number of clusters is not small compared to the
+        number of samples. In this case, the complete tree is not computed, thus
+        the 'children' output is of limited use, and the 'parents' output should
+        rather be used. This option is valid only when specifying a connectivity
+        matrix.
 
     return_distance : bool, default=None
         If True, return the distance between the clusters.
@@ -193,7 +194,7 @@ def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False):
         The number of connected components in the graph.
 
     n_leaves : int
-        The number of leaves in the tree
+        The number of leaves in the tree.
 
     parents : ndarray of shape (n_nodes,) or None
         The parent of each node. Only returned when a connectivity matrix
@@ -202,9 +203,9 @@ def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False):
     distances : ndarray of shape (n_nodes-1,)
         Only returned if return_distance is set to True (for compatibility).
         The distances between the centers of the nodes. `distances[i]`
-        corresponds to a weighted euclidean distance between
+        corresponds to a weighted Euclidean distance between
         the nodes `children[i, 1]` and `children[i, 2]`. If the nodes refer to
-        leaves of the tree, then `distances[i]` is their unweighted euclidean
+        leaves of the tree, then `distances[i]` is their unweighted Euclidean
         distance. Distances are updated in the following way
         (from scipy.hierarchy.linkage):
 
@@ -378,16 +379,16 @@ def linkage_tree(
     Parameters
     ----------
     X : array-like of shape (n_samples, n_features)
-        feature matrix representing n_samples samples to be clustered
+        feature matrix representing `n_samples` samples to be clustered
 
     connectivity : sparse matrix, default=None
         connectivity matrix. Defines for each sample the neighboring samples
         following a given structure of the data. The matrix is assumed to
         be symmetric and only the upper triangular half is used.
-        Default is None, i.e, the Ward algorithm is unstructured.
+        Default is `None`, i.e, the Ward algorithm is unstructured.
 
     n_clusters : int, default=None
-        Stop early the construction of the tree at n_clusters. This is
+        Stop early the construction of the tree at `n_clusters`. This is
         useful to decrease computation time if the number of clusters is
         not small compared to the number of samples. In this case, the
         complete tree is not computed, thus the 'children' output is of
@@ -397,16 +398,16 @@ def linkage_tree(
     linkage : {"average", "complete", "single"}, default="complete"
         Which linkage criteria to use. The linkage criterion determines which
         distance to use between sets of observation.
-            - average uses the average of the distances of each observation of
+            - "average" uses the average of the distances of each observation of
               the two sets
-            - complete or maximum linkage uses the maximum distances between
+            - "complete" or maximum linkage uses the maximum distances between
               all observations of the two sets.
-            - single uses the minimum of the distances between all observations
-              of the two sets.
+            - "single" uses the minimum of the distances between all
+              observations of the two sets.
 
     affinity : str or callable, default="euclidean".
         which metric to use. Can be "euclidean", "manhattan", or any
-        distance know to paired distance (see metric.pairwise)
+        distance known to paired distance (see metric.pairwise)
 
     return_distance : bool, default=False
         whether or not to return the distances between the clusters.
@@ -419,7 +420,7 @@ def linkage_tree(
         A node `i` greater than or equal to `n_samples` is a non-leaf
         node and has children `children_[i - n_samples]`. Alternatively
         at the i-th iteration, children[i][0] and children[i][1]
-        are merged to form node `n_samples + i`
+        are merged to form node `n_samples + i`.
 
     n_connected_components : int
         The number of connected components in the graph.
@@ -432,7 +433,7 @@ def linkage_tree(
         is specified, elsewhere 'None' is returned.
 
     distances : ndarray of shape (n_nodes-1,)
-        Returned when return_distance is set to True.
+        Returned when `return_distance` is set to `True`.
 
         distances[i] refers to the distance between children[i][0] and
         children[i][1] when they are merged.
@@ -677,7 +678,7 @@ def _hc_cut(n_clusters, children, n_leaves):
         A node `i` greater than or equal to `n_samples` is a non-leaf
         node and has children `children_[i - n_samples]`. Alternatively
         at the i-th iteration, children[i][0] and children[i][1]
-        are merged to form node `n_samples + i`
+        are merged to form node `n_samples + i`.
 
     n_leaves : int
         Number of leaves of the tree.
@@ -685,7 +686,7 @@ def _hc_cut(n_clusters, children, n_leaves):
     Returns
     -------
     labels : array [n_samples]
-        cluster labels for each point
+        cluster labels for each point.
 
     """
     if n_clusters > n_leaves:
@@ -747,7 +748,7 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator):
         samples following a given structure of the data.
         This can be a connectivity matrix itself or a callable that transforms
         the data into a connectivity matrix, such as derived from
-        kneighbors_graph. Default is ``None``, i.e, the
+        `kneighbors_graph`. Default is ``None``, i.e, the
         hierarchical clustering algorithm is unstructured.
 
     compute_full_tree : 'auto' or bool, default='auto'
@@ -1167,7 +1168,8 @@ def fit(self, X, y=None, **params):
 
         Returns
         -------
-        self
+        self : object
+            Returns the transformer.
         """
         X = self._validate_data(
             X,

From cd65b5fd0b45dbd03ef090f10d7fbe7a3aa63de3 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Fri, 16 Jul 2021 11:41:24 -0400
Subject: [PATCH 02/31] more cleanup: make consistent use of quotes

---
 sklearn/cluster/_agglomerative.py | 37 ++++++++++++++++---------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index a2ca7d267d9b2..93b1b24ac46f0 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -395,18 +395,18 @@ def linkage_tree(
         limited use, and the 'parents' output should rather be used.
         This option is valid only when specifying a connectivity matrix.
 
-    linkage : {"average", "complete", "single"}, default="complete"
+    linkage : {'average', 'complete', 'single'}, default='complete'
         Which linkage criteria to use. The linkage criterion determines which
         distance to use between sets of observation.
-            - "average" uses the average of the distances of each observation of
+            - 'average' uses the average of the distances of each observation of
               the two sets
-            - "complete" or maximum linkage uses the maximum distances between
+            - 'complete' or maximum linkage uses the maximum distances between
               all observations of the two sets.
-            - "single" uses the minimum of the distances between all
+            - 'single' uses the minimum of the distances between all
               observations of the two sets.
 
-    affinity : str or callable, default="euclidean".
-        which metric to use. Can be "euclidean", "manhattan", or any
+    affinity : str or callable, default='euclidean'.
+        which metric to use. Can be 'euclidean', 'manhattan', or any
         distance known to paired distance (see metric.pairwise)
 
     return_distance : bool, default=False
@@ -686,7 +686,7 @@ def _hc_cut(n_clusters, children, n_leaves):
     Returns
     -------
     labels : array [n_samples]
-        cluster labels for each point.
+        Cluster labels for each point.
 
     """
     if n_clusters > n_leaves:
@@ -720,8 +720,7 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator):
     """
     Agglomerative Clustering
 
-    Recursively merges the pair of clusters that minimally increases
-    a given linkage distance.
+    Recursively merges pair of clusters of sample data; uses linkage distance.
 
     Read more in the :ref:`User Guide <hierarchical_clustering>`.
 
@@ -801,7 +800,7 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator):
         ``n_clusters``.
 
     labels_ : ndarray of shape (n_samples)
-        cluster labels for each point
+        Cluster labels for each point.
 
     n_leaves_ : int
         Number of leaves in the hierarchical tree.
@@ -823,13 +822,16 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator):
         A node `i` greater than or equal to `n_samples` is a non-leaf
         node and has children `children_[i - n_samples]`. Alternatively
         at the i-th iteration, children[i][0] and children[i][1]
-        are merged to form node `n_samples + i`
+        are merged to form node `n_samples + i`.
 
     distances_ : array-like of shape (n_nodes-1,)
         Distances between nodes in the corresponding place in `children_`.
         Only computed if `distance_threshold` is used or `compute_distances`
         is set to `True`.
 
+    See Also
+    ----------
+
     Examples
     --------
     >>> from sklearn.cluster import AgglomerativeClustering
@@ -841,7 +843,6 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator):
     AgglomerativeClustering()
     >>> clustering.labels_
     array([1, 1, 1, 0, 0, 0])
-
     """
 
     def __init__(
@@ -1052,12 +1053,12 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
         distance to use between sets of features. The algorithm will merge
         the pairs of cluster that minimize this criterion.
 
-        - ward minimizes the variance of the clusters being merged.
-        - average uses the average of the distances of each feature of
-          the two sets.
-        - complete or maximum linkage uses the maximum distances between
+        - 'ward' minimizes the variance of the clusters being merged.
+        - 'complete' or maximum linkage uses the maximum distances between
           all features of the two sets.
-        - single uses the minimum of the distances between all features
+        - 'average' uses the average of the distances of each feature of
+          the two sets.
+        - 'single' uses the minimum of the distances between all features
           of the two sets.
 
     pooling_func : callable, default=np.mean
@@ -1087,7 +1088,7 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
         ``n_clusters``.
 
     labels_ : array-like of (n_features,)
-        cluster labels for each feature.
+        Cluster labels for each feature.
 
     n_leaves_ : int
         Number of leaves in the hierarchical tree.

From d4d05dae84b3e145ef26a2a463ec9e672c3a7371 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Fri, 16 Jul 2021 12:43:03 -0400
Subject: [PATCH 03/31] fixing Aggl - Clust errors

---
 sklearn/cluster/_agglomerative.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 93b1b24ac46f0..c7432a34ffd28 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -160,7 +160,7 @@ def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False):
     Parameters
     ----------
     X : array-like of shape (n_samples, n_features)
-        feature matrix representing `n_samples` samples to be clustered
+        feature matrix representing `n_samples` samples to be clustered.
 
     connectivity : sparse matrix, default=None
         connectivity matrix. Defines for each sample the neighboring samples
@@ -188,7 +188,7 @@ def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False):
         A node `i` greater than or equal to `n_samples` is a non-leaf
         node and has children `children_[i - n_samples]`. Alternatively
         at the i-th iteration, children[i][0] and children[i][1]
-        are merged to form node `n_samples + i`
+        are merged to form node `n_samples + i`.
 
     n_connected_components : int
         The number of connected components in the graph.
@@ -687,7 +687,6 @@ def _hc_cut(n_clusters, children, n_leaves):
     -------
     labels : array [n_samples]
         Cluster labels for each point.
-
     """
     if n_clusters > n_leaves:
         raise ValueError(
@@ -880,7 +879,8 @@ def fit(self, X, y=None):
 
         Returns
         -------
-        self
+        self : object
+            Returns the cluster.
         """
         X = self._validate_data(X, ensure_min_samples=2, estimator=self)
         memory = check_memory(self.memory)
@@ -983,8 +983,7 @@ def fit(self, X, y=None):
         return self
 
     def fit_predict(self, X, y=None):
-        """Fit the hierarchical clustering from features or distance matrix,
-        and return cluster labels.
+        """Fit the hierarchical clustering from features, or distance matrix.
 
         Parameters
         ----------
@@ -1007,8 +1006,7 @@ def fit_predict(self, X, y=None):
 class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
     """Agglomerate features.
 
-    Similar to AgglomerativeClustering, but recursively merges features
-    instead of samples.
+    Recursively merges pair of clusters of features.
 
     Read more in the :ref:`User Guide <hierarchical_clustering>`.
 

From 175c3baa3062f89b981fbb15f71e6276d3006598 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Fri, 16 Jul 2021 18:43:14 -0400
Subject: [PATCH 04/31] changing default of return_distance=False (instead of
 None)

---
 sklearn/cluster/_agglomerative.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index c7432a34ffd28..e2d9f7fb55e43 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -177,8 +177,8 @@ def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False):
         rather be used. This option is valid only when specifying a connectivity
         matrix.
 
-    return_distance : bool, default=None
-        If True, return the distance between the clusters.
+    return_distance : bool, default=False
+        If `True`, return the distance between the clusters.
 
     Returns
     -------

From c53fba57217e76af6abd58f289c1f3a2d66be8eb Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Fri, 16 Jul 2021 19:03:03 -0400
Subject: [PATCH 05/31] added period after 'Agglomerative Clustering' in class

---
 sklearn/cluster/_agglomerative.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index e2d9f7fb55e43..13ebb5634a107 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -201,7 +201,7 @@ def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False):
         is specified, elsewhere 'None' is returned.
 
     distances : ndarray of shape (n_nodes-1,)
-        Only returned if return_distance is set to True (for compatibility).
+        Only returned if `return_distance` is set to `True` (for compatibility).
         The distances between the centers of the nodes. `distances[i]`
         corresponds to a weighted Euclidean distance between
         the nodes `children[i, 1]` and `children[i, 2]`. If the nodes refer to
@@ -717,7 +717,7 @@ def _hc_cut(n_clusters, children, n_leaves):
 
 class AgglomerativeClustering(ClusterMixin, BaseEstimator):
     """
-    Agglomerative Clustering
+    Agglomerative Clustering.
 
     Recursively merges pair of clusters of sample data; uses linkage distance.
 
@@ -829,7 +829,8 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator):
         is set to `True`.
 
     See Also
-    ----------
+    --------
+    ward_tree : Hierarchical clustering with ward linkage.
 
     Examples
     --------

From b3d7fa3a9e4f3854fc44c38b94a32e7da4e90af2 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Fri, 16 Jul 2021 19:30:31 -0400
Subject: [PATCH 06/31] remove AgglomerativeClustering from test_docstrings.py
 file

---
 maint_tools/test_docstrings.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 46da597868cdb..48724663fdb27 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -10,7 +10,6 @@
 # List of modules ignored when checking for numpydoc validation.
 DOCSTRING_IGNORE_LIST = [
     "AdditiveChi2Sampler",
-    #"AgglomerativeClustering",
     "BernoulliRBM",
     "Birch",
     "CCA",

From 5cd5d8ef5f9eae8fd5cc97b389d8b21876ac4a3b Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Fri, 16 Jul 2021 19:36:10 -0400
Subject: [PATCH 07/31] missing period for X parameter, line 380

---
 sklearn/cluster/_agglomerative.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 13ebb5634a107..c7da9c34c2c3a 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -379,7 +379,7 @@ def linkage_tree(
     Parameters
     ----------
     X : array-like of shape (n_samples, n_features)
-        feature matrix representing `n_samples` samples to be clustered
+        feature matrix representing `n_samples` samples to be clustered.
 
     connectivity : sparse matrix, default=None
         connectivity matrix. Defines for each sample the neighboring samples

From b4023667f8b9e2fd292b22a734ca3ef7adf63113 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Fri, 16 Jul 2021 19:51:31 -0400
Subject: [PATCH 08/31] formatting, for consistency

---
 sklearn/cluster/_agglomerative.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index c7da9c34c2c3a..c10745f7fa9a7 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -399,7 +399,7 @@ def linkage_tree(
         Which linkage criteria to use. The linkage criterion determines which
         distance to use between sets of observation.
             - 'average' uses the average of the distances of each observation of
-              the two sets
+              the two sets.
             - 'complete' or maximum linkage uses the maximum distances between
               all observations of the two sets.
             - 'single' uses the minimum of the distances between all
@@ -407,7 +407,7 @@ def linkage_tree(
 
     affinity : str or callable, default='euclidean'.
         which metric to use. Can be 'euclidean', 'manhattan', or any
-        distance known to paired distance (see metric.pairwise)
+        distance known to paired distance (see metric.pairwise).
 
     return_distance : bool, default=False
         whether or not to return the distances between the clusters.
@@ -1032,11 +1032,11 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
         features following a given structure of the data.
         This can be a connectivity matrix itself or a callable that transforms
         the data into a connectivity matrix, such as derived from
-        kneighbors_graph. Default is None, i.e, the
+        kneighbors_graph. Default is `None`, i.e, the
         hierarchical clustering algorithm is unstructured.
 
     compute_full_tree : 'auto' or bool, default='auto'
-        Stop early the construction of the tree at n_clusters. This is useful
+        Stop early the construction of the tree at `n_clusters`. This is useful
         to decrease computation time if the number of clusters is not small
         compared to the number of features. This option is useful only when
         specifying a connectivity matrix. Note also that when varying the
@@ -1109,7 +1109,7 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
         A node `i` greater than or equal to `n_features` is a non-leaf
         node and has children `children_[i - n_features]`. Alternatively
         at the i-th iteration, children[i][0] and children[i][1]
-        are merged to form node `n_features + i`
+        are merged to form node `n_features + i`.
 
     distances_ : array-like of shape (n_nodes-1,)
         Distances between nodes in the corresponding place in `children_`.
@@ -1162,9 +1162,10 @@ def fit(self, X, y=None, **params):
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
-            The data
+            The data.
 
         y : Ignored
+            Not used, present here for API consistency by convention.
 
         Returns
         -------

From e57dcebe02380a28d5d99da9d13bd68f2655abef Mon Sep 17 00:00:00 2001
From: Reshama Shaikh <reshama.stat@gmail.com>
Date: Tue, 20 Jul 2021 09:25:27 -0400
Subject: [PATCH 09/31] Update sklearn/cluster/_agglomerative.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/cluster/_agglomerative.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index c10745f7fa9a7..7a55d04dc995e 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -160,7 +160,7 @@ def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False):
     Parameters
     ----------
     X : array-like of shape (n_samples, n_features)
-        feature matrix representing `n_samples` samples to be clustered.
+        Feature matrix representing `n_samples` samples to be clustered.
 
     connectivity : sparse matrix, default=None
         connectivity matrix. Defines for each sample the neighboring samples

From cff46d145efb5dd5dfa79ffd0a193f052ca0b7a6 Mon Sep 17 00:00:00 2001
From: Reshama Shaikh <reshama.stat@gmail.com>
Date: Tue, 20 Jul 2021 09:25:51 -0400
Subject: [PATCH 10/31] Update sklearn/cluster/_agglomerative.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/cluster/_agglomerative.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 7a55d04dc995e..65b4166645b7b 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -1032,7 +1032,7 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
         features following a given structure of the data.
         This can be a connectivity matrix itself or a callable that transforms
         the data into a connectivity matrix, such as derived from
-        kneighbors_graph. Default is `None`, i.e, the
+        `kneighbors_graph`. Default is `None`, i.e, the
         hierarchical clustering algorithm is unstructured.
 
     compute_full_tree : 'auto' or bool, default='auto'

From fe900704bea1080413eb22133695b4f26ab1772c Mon Sep 17 00:00:00 2001
From: Reshama Shaikh <reshama.stat@gmail.com>
Date: Tue, 20 Jul 2021 09:26:29 -0400
Subject: [PATCH 11/31] Update sklearn/cluster/_agglomerative.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/cluster/_agglomerative.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 65b4166645b7b..711f638d94121 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -379,7 +379,7 @@ def linkage_tree(
     Parameters
     ----------
     X : array-like of shape (n_samples, n_features)
-        feature matrix representing `n_samples` samples to be clustered.
+        Feature matrix representing `n_samples` samples to be clustered.
 
     connectivity : sparse matrix, default=None
         connectivity matrix. Defines for each sample the neighboring samples

From cec6bcd32317a0fd9a8859d9af205333d13b50e7 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Tue, 20 Jul 2021 09:29:37 -0400
Subject: [PATCH 12/31] remove `FeatureAgglomeration` from test_docstrings.py

---
 maint_tools/test_docstrings.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 3215f00f61a58..c47b8839e704d 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -28,7 +28,6 @@
     "ExtraTreesRegressor",
     "FactorAnalysis",
     "FastICA",
-    "FeatureAgglomeration",
     "FeatureHasher",
     "FeatureUnion",
     "FunctionTransformer",

From f0c4764bcc95f2998e10a7f28944a01524a2eb41 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Tue, 20 Jul 2021 09:33:05 -0400
Subject: [PATCH 13/31] change linkage options from single to double quotes

---
 sklearn/cluster/_agglomerative.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 711f638d94121..33388f84a61f7 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -395,14 +395,14 @@ def linkage_tree(
         limited use, and the 'parents' output should rather be used.
         This option is valid only when specifying a connectivity matrix.
 
-    linkage : {'average', 'complete', 'single'}, default='complete'
+    linkage : {"average", "complete", "single"}, default="complete"
         Which linkage criteria to use. The linkage criterion determines which
         distance to use between sets of observation.
-            - 'average' uses the average of the distances of each observation of
+            - "average" uses the average of the distances of each observation of
               the two sets.
-            - 'complete' or maximum linkage uses the maximum distances between
+            - "complete" or maximum linkage uses the maximum distances between
               all observations of the two sets.
-            - 'single' uses the minimum of the distances between all
+            - "single" uses the minimum of the distances between all
               observations of the two sets.
 
     affinity : str or callable, default='euclidean'.

From 876502b1c4fbf89a907ae06d1e2e65eacab8388e Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Tue, 20 Jul 2021 09:35:38 -0400
Subject: [PATCH 14/31] for linkage parameter, change quotes from single to
 double

---
 sklearn/cluster/_agglomerative.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 33388f84a61f7..f2acd9ba40ad6 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -1047,17 +1047,17 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
         is inferior to the maximum between 100 or `0.02 * n_samples`.
         Otherwise, "auto" is equivalent to `False`.
 
-    linkage : {'ward', 'complete', 'average', 'single'}, default='ward'
+    linkage : {"ward", "complete", "average", "single"}, default="ward"
         Which linkage criterion to use. The linkage criterion determines which
         distance to use between sets of features. The algorithm will merge
         the pairs of cluster that minimize this criterion.
 
-        - 'ward' minimizes the variance of the clusters being merged.
-        - 'complete' or maximum linkage uses the maximum distances between
+        - "ward" minimizes the variance of the clusters being merged.
+        - "complete" or maximum linkage uses the maximum distances between
           all features of the two sets.
-        - 'average' uses the average of the distances of each feature of
+        - "average" uses the average of the distances of each feature of
           the two sets.
-        - 'single' uses the minimum of the distances between all features
+        - "single" uses the minimum of the distances between all features
           of the two sets.
 
     pooling_func : callable, default=np.mean

From 902a14f95d83b78763de591f3f6124d8d0784b31 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Tue, 20 Jul 2021 10:40:22 -0400
Subject: [PATCH 15/31] removed unnecessary period; added a period in def
 function

---
 sklearn/cluster/_agglomerative.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index f2acd9ba40ad6..da26df9603f89 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -405,7 +405,7 @@ def linkage_tree(
             - "single" uses the minimum of the distances between all
               observations of the two sets.
 
-    affinity : str or callable, default='euclidean'.
+    affinity : str or callable, default='euclidean'
         which metric to use. Can be 'euclidean', 'manhattan', or any
         distance known to paired distance (see metric.pairwise).
 
@@ -1157,7 +1157,7 @@ def __init__(
         self.pooling_func = pooling_func
 
     def fit(self, X, y=None, **params):
-        """Fit the hierarchical clustering on the data
+        """Fit the hierarchical clustering on the data.
 
         Parameters
         ----------

From 10f4b0ed5909f8a67d7ce0a55fb8938fb3acfc3a Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Tue, 20 Jul 2021 11:28:46 -0400
Subject: [PATCH 16/31] fixed missing periods in _feature_agglomeration.py file

---
 sklearn/cluster/_feature_agglomeration.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py
index e6e03d57651b7..a5624e56351cf 100644
--- a/sklearn/cluster/_feature_agglomeration.py
+++ b/sklearn/cluster/_feature_agglomeration.py
@@ -17,12 +17,12 @@
 
 class AgglomerationTransform(TransformerMixin):
     """
-    A class for feature agglomeration via the transform interface
+    A class for feature agglomeration via the transform interface.
     """
 
     def transform(self, X):
         """
-        Transform a new matrix using the built clustering
+        Transform a new matrix using the built clustering.
 
         Parameters
         ----------
@@ -57,12 +57,12 @@ def inverse_transform(self, Xred):
         """
         Inverse the transformation.
         Return a vector of size nb_features with the values of Xred assigned
-        to each group of features
+        to each group of features.
 
         Parameters
         ----------
         Xred : array-like of shape (n_samples, n_clusters) or (n_clusters,)
-            The values to be assigned to each cluster of samples
+            The values to be assigned to each cluster of samples.
 
         Returns
         -------

From 327cb222fd62fb03bd05e1bcbb342a20a57f8c82 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Fri, 23 Jul 2021 12:21:22 -0400
Subject: [PATCH 17/31] formatting changes

---
 sklearn/cluster/_agglomerative.py         | 4 ++--
 sklearn/cluster/_feature_agglomeration.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index da26df9603f89..ef99451a4356a 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -33,12 +33,12 @@
 
 def _fix_connectivity(X, connectivity, affinity):
     """
-    Fixes the connectivity matrix
+    Fixes the connectivity matrix.
 
         - copies it
         - makes it symmetric
         - converts it to LIL if necessary
-        - completes it if necessary
+        - completes it if necessary.
     """
     n_samples = X.shape[0]
     if connectivity.shape[0] != n_samples or connectivity.shape[1] != n_samples:
diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py
index a5624e56351cf..b9e3639810ddd 100644
--- a/sklearn/cluster/_feature_agglomeration.py
+++ b/sklearn/cluster/_feature_agglomeration.py
@@ -67,7 +67,7 @@ def inverse_transform(self, Xred):
         Returns
         -------
         X : ndarray of shape (n_samples, n_features) or (n_features,)
-            A vector of size n_samples with the values of Xred assigned to
+            A vector of size `n_samples` with the values of `Xred` assigned to
             each of the cluster of samples.
         """
         check_is_fitted(self)

From 6dbf04bcf62caf92cd1012aac119d1525c4958f2 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Tue, 27 Jul 2021 14:56:12 -0400
Subject: [PATCH 18/31] fix merge conflicts in test_docstrings.py

---
 maint_tools/test_docstrings.py | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 85171a98c9251..7cef24909b0d4 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -9,14 +9,8 @@
 
 # List of modules ignored when checking for numpydoc validation.
 DOCSTRING_IGNORE_LIST = [
-<<<<<<< HEAD
-<<<<<<< HEAD
     "AdditiveChi2Sampler",
-=======
-    "AgglomerativeClustering",
->>>>>>> 2844f592be6eba36d952a4a1ad68cc41e2845c27
-=======
->>>>>>> 7c1aef604e1d709f5cebaf99246c086be8557523
+    #"AgglomerativeClustering",
     "BernoulliRBM",
     "Birch",
     "CalibratedClassifierCV",
@@ -31,14 +25,8 @@
     "ExtraTreesClassifier",
     "ExtraTreesRegressor",
     "FactorAnalysis",
-<<<<<<< HEAD
-<<<<<<< HEAD
     "FastICA",
-=======
-    "FeatureAgglomeration",
->>>>>>> 2844f592be6eba36d952a4a1ad68cc41e2845c27
-=======
->>>>>>> 7c1aef604e1d709f5cebaf99246c086be8557523
+    #"FeatureAgglomeration",
     "FeatureHasher",
     "FeatureUnion",
     "FunctionTransformer",

From 682b3a9bd16cf5668f0c0807928cebe6ec6be97f Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Wed, 28 Jul 2021 12:27:50 -0400
Subject: [PATCH 19/31] adding in parameters docstring

---
 sklearn/cluster/_agglomerative.py         | 32 +++++++++++++++++++++--
 sklearn/cluster/_feature_agglomeration.py |  4 +--
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 974a185aa1d92..02919775f87ba 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -39,6 +39,27 @@ def _fix_connectivity(X, connectivity, affinity):
         - makes it symmetric
         - converts it to LIL if necessary
         - completes it if necessary.
+    
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features)
+        Feature matrix representing `n_samples` samples to be clustered.
+
+    connectivity : sparse matrix, default=None
+        Connectivity matrix. Defines for each sample the neighboring samples
+        following a given structure of the data. The matrix is assumed to
+        be symmetric and only the upper triangular half is used.
+        Default is `None`, i.e, the Ward algorithm is unstructured.
+
+    affinity : {"euclidean", "precomputed"}, default="euclidean"
+        Which affinity to use. At the moment `precomputed` and
+        ``euclidean`` are supported. `euclidean` uses the
+        negative squared Euclidean distance between points.
+
+    Returns
+    -------
+    n_connected_components : int
+        The number of connected components in the graph.
     """
     n_samples = X.shape[0]
     if connectivity.shape[0] != n_samples or connectivity.shape[1] != n_samples:
@@ -166,7 +187,7 @@ def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False):
         Feature matrix representing `n_samples` samples to be clustered.
 
     connectivity : sparse matrix, default=None
-        connectivity matrix. Defines for each sample the neighboring samples
+        Connectivity matrix. Defines for each sample the neighboring samples
         following a given structure of the data. The matrix is assumed to
         be symmetric and only the upper triangular half is used.
         Default is None, i.e, the Ward algorithm is unstructured.
@@ -385,7 +406,7 @@ def linkage_tree(
         Feature matrix representing `n_samples` samples to be clustered.
 
     connectivity : sparse matrix, default=None
-        connectivity matrix. Defines for each sample the neighboring samples
+        Connectivity matrix. Defines for each sample the neighboring samples
         following a given structure of the data. The matrix is assumed to
         be symmetric and only the upper triangular half is used.
         Default is `None`, i.e, the Ward algorithm is unstructured.
@@ -1119,6 +1140,10 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
         Only computed if `distance_threshold` is used or `compute_distances`
         is set to `True`.
 
+    See Also
+    --------
+    ward_tree : Hierarchical clustering with ward linkage.
+
     Examples
     --------
     >>> import numpy as np
@@ -1170,6 +1195,9 @@ def fit(self, X, y=None, **params):
         y : Ignored
             Not used, present here for API consistency by convention.
 
+        **params : dictionary of keyword arguments
+            Additional fit parameters.
+
         Returns
         -------
         self : object
diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py
index b9e3639810ddd..145e9c4ae86c5 100644
--- a/sklearn/cluster/_feature_agglomeration.py
+++ b/sklearn/cluster/_feature_agglomeration.py
@@ -55,9 +55,7 @@ def transform(self, X):
 
     def inverse_transform(self, Xred):
         """
-        Inverse the transformation.
-        Return a vector of size nb_features with the values of Xred assigned
-        to each group of features.
+        Inverse the transformation and return a vector of size nb_features.
 
         Parameters
         ----------

From e17ba51dee372e340532b716d3a54b89abeacdb1 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Wed, 28 Jul 2021 13:37:59 -0400
Subject: [PATCH 20/31] trying to fix docstring error

---
 sklearn/cluster/_agglomerative.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 02919775f87ba..df0fedd325e18 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -120,6 +120,15 @@ def _single_linkage_tree(
     Perform single linkage clustering on sparse data via the minimum
     spanning tree from scipy.sparse.csgraph, then using union-find to label.
     The parent array is then generated by walking through the tree.
+
+    Parameters
+    ----------
+    connectivity : sparse matrix, default=None
+        Connectivity matrix. Defines for each sample the neighboring samples
+        following a given structure of the data. The matrix is assumed to
+        be symmetric and only the upper triangular half is used.
+        Default is `None`, i.e, the Ward algorithm is unstructured.
+
     """
     from scipy.sparse.csgraph import minimum_spanning_tree
 

From 95fadab4243319531b46a1b535a9b98caaee92d7 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Wed, 28 Jul 2021 13:39:10 -0400
Subject: [PATCH 21/31] removed two classes from test_docstrings.py

---
 maint_tools/test_docstrings.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 020b2b67d2299..404808404d612 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -10,7 +10,6 @@
 # List of modules ignored when checking for numpydoc validation.
 DOCSTRING_IGNORE_LIST = [
     "AdditiveChi2Sampler",
-    #"AgglomerativeClustering",
     "BernoulliRBM",
     "Birch",
     "CalibratedClassifierCV",
@@ -24,7 +23,6 @@
     "ExtraTreeRegressor",
     "FactorAnalysis",
     "FastICA",
-    #"FeatureAgglomeration",
     "FeatureHasher",
     "FeatureUnion",
     "FunctionTransformer",

From 3675bd322053d2041a57db1cdc82529175252e0d Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Wed, 28 Jul 2021 13:55:24 -0400
Subject: [PATCH 22/31] ran black on _agglomerative.py

---
 sklearn/cluster/_agglomerative.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index df0fedd325e18..465a2678eab22 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -39,7 +39,7 @@ def _fix_connectivity(X, connectivity, affinity):
         - makes it symmetric
         - converts it to LIL if necessary
         - completes it if necessary.
-    
+
     Parameters
     ----------
     X : array-like of shape (n_samples, n_features)

From 4006aecf0fd913be97dedffa07125b874b2aab0e Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Wed, 28 Jul 2021 14:06:10 -0400
Subject: [PATCH 23/31] remove AdditiveChi2Sampler and FastICA from
 test_docstrings.py

---
 maint_tools/test_docstrings.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 404808404d612..ee0cf1210601f 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -9,7 +9,6 @@
 
 # List of modules ignored when checking for numpydoc validation.
 DOCSTRING_IGNORE_LIST = [
-    "AdditiveChi2Sampler",
     "BernoulliRBM",
     "Birch",
     "CalibratedClassifierCV",
@@ -22,7 +21,6 @@
     "ElasticNetCV",
     "ExtraTreeRegressor",
     "FactorAnalysis",
-    "FastICA",
     "FeatureHasher",
     "FeatureUnion",
     "FunctionTransformer",

From 261063bae6b9bab79e3deeb64448699453070653 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Fri, 30 Jul 2021 09:54:58 -0400
Subject: [PATCH 24/31] removed duplicate text for docstring for def fit

---
 sklearn/cluster/_agglomerative.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 465a2678eab22..2b9f1025553d3 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -1017,7 +1017,8 @@ def fit(self, X, y=None):
         return self
 
     def fit_predict(self, X, y=None):
-        """Fit the hierarchical clustering from features, or distance matrix.
+        """Fit and return the result of the clustering assignment for each
+        sample in the training set.
 
         Parameters
         ----------

From 5f5bfd0f0a8c50d0300af7bb55b923556f3210d4 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Fri, 30 Jul 2021 10:32:50 -0400
Subject: [PATCH 25/31] edit docstring so it is only one line

---
 sklearn/cluster/_agglomerative.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 2b9f1025553d3..1a2455ffb5f83 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -1017,8 +1017,7 @@ def fit(self, X, y=None):
         return self
 
     def fit_predict(self, X, y=None):
-        """Fit and return the result of the clustering assignment for each
-        sample in the training set.
+        """Fit and return the result of each sample's clustering assignment.
 
         Parameters
         ----------

From 296aa0ca0b99e16edab807398332942063141478 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Mon, 2 Aug 2021 11:17:24 -0400
Subject: [PATCH 26/31] line 907: removing shape of n_samples by n_samples,
 doesn t seem right

---
 sklearn/cluster/_agglomerative.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 1a2455ffb5f83..e663c9fb94a09 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -904,7 +904,7 @@ def fit(self, X, y=None):
 
         Parameters
         ----------
-        X : array-like, shape (n_samples, n_features) or (n_samples, n_samples)
+        X : array-like, shape (n_samples, n_features)
             Training instances to cluster, or distances between instances if
             ``affinity='precomputed'``.
 

From a11aad3d4a3e7605b5ffbf0cba3d86eb992fe073 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Mon, 2 Aug 2021 11:18:39 -0400
Subject: [PATCH 27/31] try (n_samples,)

---
 sklearn/cluster/_agglomerative.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index e663c9fb94a09..9cbec752f8cfc 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -904,7 +904,7 @@ def fit(self, X, y=None):
 
         Parameters
         ----------
-        X : array-like, shape (n_samples, n_features)
+        X : array-like, shape (n_samples, n_features) or (n_samples,)
             Training instances to cluster, or distances between instances if
             ``affinity='precomputed'``.
 

From 59d960ed711ea9f10f1dc6cc3600c96513268681 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Mon, 2 Aug 2021 12:45:45 -0400
Subject: [PATCH 28/31] yes, it should be (n_samples, n_samples)!

---
 sklearn/cluster/_agglomerative.py         | 3 ++-
 sklearn/cluster/_feature_agglomeration.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 9cbec752f8cfc..0ae65f5585241 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -904,7 +904,8 @@ def fit(self, X, y=None):
 
         Parameters
         ----------
-        X : array-like, shape (n_samples, n_features) or (n_samples,)
+        X : array-like, shape (n_samples, n_features) or \
+                (n_samples, n_samples)
             Training instances to cluster, or distances between instances if
             ``affinity='precomputed'``.
 
diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py
index 145e9c4ae86c5..827dcc72cc804 100644
--- a/sklearn/cluster/_feature_agglomeration.py
+++ b/sklearn/cluster/_feature_agglomeration.py
@@ -26,7 +26,8 @@ def transform(self, X):
 
         Parameters
         ----------
-        X : array-like of shape (n_samples, n_features) or (n_samples,)
+        X : array-like of shape (n_samples, n_features) or \
+                (n_samples, n_samples)
             A M by N array of M observations in N dimensions or a length
             M array of M one-dimensional observations.
 

From 79dd56109ef90d96a9cc3f8cb353fdbac419bfcf Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Mon, 2 Aug 2021 13:19:44 -0400
Subject: [PATCH 29/31] added docstring to @property of def fit_predict

---
 sklearn/cluster/_agglomerative.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 0ae65f5585241..ed6f2d485c78c 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -1228,4 +1228,6 @@ def fit(self, X, y=None, **params):
 
     @property
     def fit_predict(self):
+        """Fit and return the result of each sample's clustering assignment.
+        """
         raise AttributeError

From 9e9a77649c0172490e4849546769367259efae2f Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Mon, 2 Aug 2021 13:27:34 -0400
Subject: [PATCH 30/31] ran black on _agglomerative.py

---
 sklearn/cluster/_agglomerative.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index ed6f2d485c78c..cbb26aad7dd48 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -1228,6 +1228,5 @@ def fit(self, X, y=None, **params):
 
     @property
     def fit_predict(self):
-        """Fit and return the result of each sample's clustering assignment.
-        """
+        """Fit and return the result of each sample's clustering assignment."""
         raise AttributeError

From 60df0b901ca8d470cba6b5d36de163d860c10fee Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 31 Aug 2021 18:13:12 +0200
Subject: [PATCH 31/31] small changes

---
 sklearn/cluster/_agglomerative.py         | 31 +++++++++++++----------
 sklearn/cluster/_feature_agglomeration.py |  2 +-
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 51aa72befed1f..77f159e0e996f 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -36,10 +36,12 @@ def _fix_connectivity(X, connectivity, affinity):
     """
     Fixes the connectivity matrix.
 
-        - copies it
-        - makes it symmetric
-        - converts it to LIL if necessary
-        - completes it if necessary.
+    The different steps are:
+
+    - copies it
+    - makes it symmetric
+    - converts it to LIL if necessary
+    - completes it if necessary.
 
     Parameters
     ----------
@@ -59,6 +61,9 @@ def _fix_connectivity(X, connectivity, affinity):
 
     Returns
     -------
+    connectivity : sparse matrix
+        The fixed connectivity matrix.
+
     n_connected_components : int
         The number of connected components in the graph.
     """
@@ -114,15 +119,6 @@ def _single_linkage_tree(
     Perform single linkage clustering on sparse data via the minimum
     spanning tree from scipy.sparse.csgraph, then using union-find to label.
     The parent array is then generated by walking through the tree.
-
-    Parameters
-    ----------
-    connectivity : sparse matrix, default=None
-        Connectivity matrix. Defines for each sample the neighboring samples
-        following a given structure of the data. The matrix is assumed to
-        be symmetric and only the upper triangular half is used.
-        Default is `None`, i.e, the Ward algorithm is unstructured.
-
     """
     from scipy.sparse.csgraph import minimum_spanning_tree
 
@@ -856,6 +852,8 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator):
 
     See Also
     --------
+    FeatureAgglomeration : Agglomerative clustering but for features instead of
+        samples.
     ward_tree : Hierarchical clustering with ward linkage.
 
     Examples
@@ -908,7 +906,7 @@ def fit(self, X, y=None):
         Returns
         -------
         self : object
-            Returns the cluster.
+            Returns the fitted instance.
         """
         X = self._validate_data(X, ensure_min_samples=2, estimator=self)
         memory = check_memory(self.memory)
@@ -1013,6 +1011,9 @@ def fit(self, X, y=None):
     def fit_predict(self, X, y=None):
         """Fit and return the result of each sample's clustering assignment.
 
+        In addition to fitting, this method also return the result of the
+        clustering assignment for each sample in the training set.
+
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features) or \
@@ -1145,6 +1146,8 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
 
     See Also
     --------
+    AgglomerativeClustering : Agglomerative clustering samples instead of
+        features.
     ward_tree : Hierarchical clustering with ward linkage.
 
     Examples
diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py
index 827dcc72cc804..457a83dd41e71 100644
--- a/sklearn/cluster/_feature_agglomeration.py
+++ b/sklearn/cluster/_feature_agglomeration.py
@@ -56,7 +56,7 @@ def transform(self, X):
 
     def inverse_transform(self, Xred):
         """
-        Inverse the transformation and return a vector of size nb_features.
+        Inverse the transformation and return a vector of size `n_features`.
 
         Parameters
         ----------