diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst
index 832a4e4389b19..f27ef98dfcdb0 100644
--- a/doc/whats_new/v1.2.rst
+++ b/doc/whats_new/v1.2.rst
@@ -91,6 +91,10 @@ Changelog
   `eigen_tol="auto"` in version 1.3.
   :pr:`23210` by :user:`Meekail Zain <micky774>`.
 
+- |API| The `affinity` attribute is now deprecated for
+  :class:`cluster.AgglomerativeClustering` and will be renamed to `metric` in v1.4.
+  :pr:`23470` by :user:`Meekail Zain <micky774>`.
+
 :mod:`sklearn.datasets`
 .......................
 
diff --git a/examples/cluster/plot_agglomerative_clustering_metrics.py b/examples/cluster/plot_agglomerative_clustering_metrics.py
index e022035ca2207..38fd3682d48ec 100644
--- a/examples/cluster/plot_agglomerative_clustering_metrics.py
+++ b/examples/cluster/plot_agglomerative_clustering_metrics.py
@@ -125,7 +125,7 @@ def sqr(x):
 # Plot clustering results
 for index, metric in enumerate(["cosine", "euclidean", "cityblock"]):
     model = AgglomerativeClustering(
-        n_clusters=n_clusters, linkage="average", affinity=metric
+        n_clusters=n_clusters, linkage="average", metric=metric
     )
     model.fit(X)
     plt.figure()
@@ -134,7 +134,7 @@ def sqr(x):
         plt.plot(X[model.labels_ == l].T, c=c, alpha=0.5)
     plt.axis("tight")
     plt.axis("off")
-    plt.suptitle("AgglomerativeClustering(affinity=%s)" % metric, size=20)
+    plt.suptitle("AgglomerativeClustering(metric=%s)" % metric, size=20)
 
 
 plt.show()
diff --git a/examples/cluster/plot_cluster_comparison.py b/examples/cluster/plot_cluster_comparison.py
index 8b52759c79018..a9e39267411b7 100644
--- a/examples/cluster/plot_cluster_comparison.py
+++ b/examples/cluster/plot_cluster_comparison.py
@@ -171,7 +171,7 @@
     )
     average_linkage = cluster.AgglomerativeClustering(
         linkage="average",
-        affinity="cityblock",
+        metric="cityblock",
         n_clusters=params["n_clusters"],
         connectivity=connectivity,
     )
diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index d7de31d41b325..90ee3336e0478 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -23,7 +23,7 @@
 from ..utils import check_array
 from ..utils._fast_dict import IntFloatDict
 from ..utils.graph import _fix_connected_components
-from ..utils._param_validation import Interval, StrOptions
+from ..utils._param_validation import Hidden, Interval, StrOptions
 from ..utils.validation import check_memory
 
 # mypy error: Module 'sklearn.cluster' has no attribute '_hierarchical_fast'
@@ -760,6 +760,19 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator):
         If "precomputed", a distance matrix (instead of a similarity matrix)
         is needed as input for the fit method.
 
+        .. deprecated:: 1.2
+            `affinity` was deprecated in version 1.2 and will be renamed to
+            `metric` in 1.4.
+
+    metric : str or callable, default=None
+        Metric used to compute the linkage. Can be "euclidean", "l1", "l2",
+        "manhattan", "cosine", or "precomputed". If set to `None` then
+        "euclidean" is used. If linkage is "ward", only "euclidean" is
+        accepted. If "precomputed", a distance matrix is needed as input for
+        the fit method.
+
+        .. versionadded:: 1.2
+
     memory : str or object with the joblib.Memory interface, default=None
         Used to cache the output of the computation of the tree.
         By default, no caching is done. If a string is given, it is the
@@ -880,9 +893,15 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator):
     _parameter_constraints = {
         "n_clusters": [Interval(Integral, 1, None, closed="left"), None],
         "affinity": [
+            Hidden(StrOptions({"deprecated"})),
             StrOptions(set(_VALID_METRICS) | {"precomputed"}),
             callable,
         ],
+        "metric": [
+            StrOptions(set(_VALID_METRICS) | {"precomputed"}),
+            callable,
+            None,
+        ],
         "memory": "no_validation",  # TODO
         "connectivity": ["array-like", callable, None],
         "compute_full_tree": [StrOptions({"auto"}), "boolean"],
@@ -895,7 +914,8 @@ def __init__(
         self,
         n_clusters=2,
         *,
-        affinity="euclidean",
+        affinity="deprecated",  # TODO(1.4): Remove
+        metric=None,  # TODO(1.4): Set to "euclidean"
         memory=None,
         connectivity=None,
         compute_full_tree="auto",
@@ -910,6 +930,7 @@ def __init__(
         self.compute_full_tree = compute_full_tree
         self.linkage = linkage
         self.affinity = affinity
+        self.metric = metric
         self.compute_distances = compute_distances
 
     def fit(self, X, y=None):
@@ -920,7 +941,7 @@ def fit(self, X, y=None):
         X : array-like, shape (n_samples, n_features) or \
                 (n_samples, n_samples)
             Training instances to cluster, or distances between instances if
-            ``affinity='precomputed'``.
+            ``metric='precomputed'``.
 
         y : Ignored
             Not used, present here for API consistency by convention.
@@ -950,6 +971,24 @@ def _fit(self, X):
         """
         memory = check_memory(self.memory)
 
+        self._metric = self.metric
+        # TODO(1.4): Remove
+        if self.affinity != "deprecated":
+            if self.metric is not None:
+                raise ValueError(
+                    "Both `affinity` and `metric` attributes were set. Attribute"
+                    " `affinity` was deprecated in version 1.2 and will be removed in"
+                    " 1.4. To avoid this error, only set the `metric` attribute."
+                )
+            warnings.warn(
+                "Attribute `affinity` was deprecated in version 1.2 and will be removed"
+                " in 1.4. Use `metric` instead",
+                FutureWarning,
+            )
+            self._metric = self.affinity
+        elif self.metric is None:
+            self._metric = "euclidean"
+
         if not ((self.n_clusters is None) ^ (self.distance_threshold is None)):
             raise ValueError(
                 "Exactly one of n_clusters and "
@@ -962,10 +1001,10 @@ def _fit(self, X):
                 "compute_full_tree must be True if distance_threshold is set."
             )
 
-        if self.linkage == "ward" and self.affinity != "euclidean":
+        if self.linkage == "ward" and self._metric != "euclidean":
             raise ValueError(
-                "%s was provided as affinity. Ward can only "
-                "work with euclidean distances." % (self.affinity,)
+                f"{self._metric} was provided as metric. Ward can only "
+                "work with euclidean distances."
             )
 
         tree_builder = _TREE_BUILDERS[self.linkage]
@@ -998,7 +1037,7 @@ def _fit(self, X):
         kwargs = {}
         if self.linkage != "ward":
             kwargs["linkage"] = self.linkage
-            kwargs["affinity"] = self.affinity
+            kwargs["affinity"] = self._metric
 
         distance_threshold = self.distance_threshold
 
@@ -1084,6 +1123,19 @@ class FeatureAgglomeration(
         If "precomputed", a distance matrix (instead of a similarity matrix)
         is needed as input for the fit method.
 
+        .. deprecated:: 1.2
+            `affinity` was deprecated in version 1.2 and will be renamed to
+            `metric` in 1.4.
+
+    metric : str or callable, default=None
+        Metric used to compute the linkage. Can be "euclidean", "l1", "l2",
+        "manhattan", "cosine", or "precomputed". If set to `None` then
+        "euclidean" is used. If linkage is "ward", only "euclidean" is
+        accepted. If "precomputed", a distance matrix is needed as input for
+        the fit method.
+
+        .. versionadded:: 1.2
+
     memory : str or object with the joblib.Memory interface, default=None
         Used to cache the output of the computation of the tree.
         By default, no caching is done. If a string is given, it is the
@@ -1208,8 +1260,14 @@ class FeatureAgglomeration(
     _parameter_constraints = {
         "n_clusters": [Interval(Integral, 1, None, closed="left"), None],
         "affinity": [
+            Hidden(StrOptions({"deprecated"})),
+            StrOptions(set(_VALID_METRICS) | {"precomputed"}),
+            callable,
+        ],
+        "metric": [
             StrOptions(set(_VALID_METRICS) | {"precomputed"}),
             callable,
+            None,
         ],
         "memory": "no_validation",  # TODO
         "connectivity": ["array-like", callable, None],
@@ -1224,7 +1282,8 @@ def __init__(
         self,
         n_clusters=2,
         *,
-        affinity="euclidean",
+        affinity="deprecated",  # TODO(1.4): Remove
+        metric=None,  # TODO(1.4): Set to "euclidean"
         memory=None,
         connectivity=None,
         compute_full_tree="auto",
@@ -1240,6 +1299,7 @@ def __init__(
             compute_full_tree=compute_full_tree,
             linkage=linkage,
             affinity=affinity,
+            metric=metric,
             distance_threshold=distance_threshold,
             compute_distances=compute_distances,
         )
diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py
index 3cb5e2bb2b067..012073d1ba332 100644
--- a/sklearn/cluster/tests/test_hierarchical.py
+++ b/sklearn/cluster/tests/test_hierarchical.py
@@ -241,24 +241,24 @@ def test_agglomerative_clustering():
     clustering = AgglomerativeClustering(
         n_clusters=10,
         connectivity=connectivity.toarray(),
-        affinity="manhattan",
+        metric="manhattan",
         linkage="ward",
     )
     with pytest.raises(ValueError):
         clustering.fit(X)
 
     # Test using another metric than euclidean works with linkage complete
-    for affinity in PAIRED_DISTANCES.keys():
+    for metric in PAIRED_DISTANCES.keys():
         # Compare our (structured) implementation to scipy
         clustering = AgglomerativeClustering(
             n_clusters=10,
             connectivity=np.ones((n_samples, n_samples)),
-            affinity=affinity,
+            metric=metric,
             linkage="complete",
         )
         clustering.fit(X)
         clustering2 = AgglomerativeClustering(
-            n_clusters=10, connectivity=None, affinity=affinity, linkage="complete"
+            n_clusters=10, connectivity=None, metric=metric, linkage="complete"
         )
         clustering2.fit(X)
         assert_almost_equal(
@@ -275,7 +275,7 @@ def test_agglomerative_clustering():
     clustering2 = AgglomerativeClustering(
         n_clusters=10,
         connectivity=connectivity,
-        affinity="precomputed",
+        metric="precomputed",
         linkage="complete",
     )
     clustering2.fit(X_dist)
@@ -289,7 +289,7 @@ def test_agglomerative_clustering_memory_mapped():
     """
     rng = np.random.RandomState(0)
     Xmm = create_memmap_backed_data(rng.randn(50, 100))
-    AgglomerativeClustering(affinity="euclidean", linkage="single").fit(Xmm)
+    AgglomerativeClustering(metric="euclidean", linkage="single").fit(Xmm)
 
 
 def test_ward_agglomeration():
@@ -860,7 +860,7 @@ def test_invalid_shape_precomputed_dist_matrix():
         ValueError,
         match=r"Distance matrix should be square, got matrix of shape \(5, 3\)",
     ):
-        AgglomerativeClustering(affinity="precomputed", linkage="complete").fit(X)
+        AgglomerativeClustering(metric="precomputed", linkage="complete").fit(X)
 
 
 def test_precomputed_connectivity_affinity_with_2_connected_components():
@@ -900,3 +900,26 @@ def test_precomputed_connectivity_affinity_with_2_connected_components():
 
     assert_array_equal(clusterer.labels_, clusterer_precomputed.labels_)
     assert_array_equal(clusterer.children_, clusterer_precomputed.children_)
+
+
+# TODO(1.4): Remove
+def test_deprecate_affinity():
+    rng = np.random.RandomState(42)
+    X = rng.randn(50, 10)
+
+    af = AgglomerativeClustering(affinity="euclidean")
+    msg = (
+        "Attribute `affinity` was deprecated in version 1.2 and will be removed in 1.4."
+        " Use `metric` instead"
+    )
+    with pytest.warns(FutureWarning, match=msg):
+        af.fit(X)
+    with pytest.warns(FutureWarning, match=msg):
+        af.fit_predict(X)
+
+    af = AgglomerativeClustering(metric="euclidean", affinity="euclidean")
+    msg = "Both `affinity` and `metric` attributes were set. Attribute"
+    with pytest.raises(ValueError, match=msg):
+        af.fit(X)
+    with pytest.raises(ValueError, match=msg):
+        af.fit_predict(X)