From dc822bc1212ddedd795ecc031fd35607dfaffbea Mon Sep 17 00:00:00 2001
From: Meekail Zain <zainmeekail@gmail.com>
Date: Fri, 8 Jul 2022 13:57:36 -0400
Subject: [PATCH 1/9] Fixed implementation

---
 sklearn/covariance/_shrunk_covariance.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py
index 64fce5b1db6f6..b2aeaf79427f8 100644
--- a/sklearn/covariance/_shrunk_covariance.py
+++ b/sklearn/covariance/_shrunk_covariance.py
@@ -535,17 +535,19 @@ def oas(X, *, assume_centered=False):
     else:
         n_samples, n_features = X.shape
 
-    emp_cov = empirical_covariance(X, assume_centered=assume_centered)
-    mu = np.trace(emp_cov) / n_features
-
+    # Resolves differences with OAS when ussing uncentered data
+    if not assume_centered:
+        X = X - X.mean(0)
+    emp_cov = empirical_covariance(X, assume_centered=True)
+    mu = np.trace(emp_cov)
     # formula from Chen et al.'s **implementation**
-    alpha = np.mean(emp_cov**2)
+    alpha = np.trace(emp_cov**2)
     num = alpha + mu**2
     den = (n_samples + 1.0) * (alpha - (mu**2) / n_features)
 
-    shrinkage = 1.0 if den == 0 else min(num / den, 1.0)
+    shrinkage = 1.0 if den == 0 else max(0, min(num / den, 1.0))
     shrunk_cov = (1.0 - shrinkage) * emp_cov
-    shrunk_cov.flat[:: n_features + 1] += shrinkage * mu
+    shrunk_cov.flat[:: n_features + 1] += shrinkage * mu / n_features
 
     return shrunk_cov, shrinkage
 
@@ -644,13 +646,13 @@ class OAS(EmpiricalCovariance):
     ...                             size=500)
     >>> oas = OAS().fit(X)
     >>> oas.covariance_
-    array([[0.7533..., 0.2763...],
-           [0.2763..., 0.3964...]])
+    array([[0.7456..., 0.2644...],
+           [0.2644..., 0.4041...]])
     >>> oas.precision_
-    array([[ 1.7833..., -1.2431... ],
-           [-1.2431...,  3.3889...]])
+    array([[ 1.7463..., -1.1428...],
+           [-1.1428...,  3.2224...]])
     >>> oas.shrinkage_
-    0.0195...
+    0.0617...
     """
 
     def fit(self, X, y=None):

From 843e08c0dde1082be1e99664533641f79fd16659 Mon Sep 17 00:00:00 2001
From: Meekail Zain <zainmeekail@gmail.com>
Date: Wed, 20 Jul 2022 13:05:55 -0400
Subject: [PATCH 2/9] Updated changelog

---
 doc/whats_new/v1.2.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst
index 7a5e92f5f960d..fb8cebe30f5d9 100644
--- a/doc/whats_new/v1.2.rst
+++ b/doc/whats_new/v1.2.rst
@@ -36,6 +36,11 @@ random sampling procedures.
   to a tiny value. Moreover, `verbose` is now properly propagated to L-BFGS-B.
   :pr:`23619` by :user:`Christian Lorentzen <lorentzenchr>`.
 
+- |Fix| :class:`covariance.OAS` and :func:`covariance.oas` now use the correct
+  formula for calculating the shrinkage parameter, and hence may produce
+  different results when run on the same data.
+  :pr:`23867` by :user:`Meekail Zain <micky774>`
+
 Changes impacting all modules
 -----------------------------
 
@@ -282,6 +287,14 @@ Changelog
   :pr:`10805` by :user:`Mathias Andersen <MrMathias>` and
   :pr:`23471` by :user:`Meekail Zain <micky774>`
 
+:mod:`sklearn.covariance`
+.........................
+
+- |Fix| :class:`covariance.OAS` and :func:`covariance.oas` now use the correct
+  formula for calculating the shrinkage parameter, and hence may produce
+  different results when run on the same data.
+  :pr:`23867` by :user:`Meekail Zain <micky774>`
+
 Code and Documentation Contributors
 -----------------------------------
 

From b37dae03381aab858b74acfa68d9b91cecf9f441 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 29 Dec 2022 17:43:05 +0100
Subject: [PATCH 3/9] remove useless diff

---
 sklearn/covariance/_shrunk_covariance.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py
index 2d96e22892243..e869f2c6b5203 100644
--- a/sklearn/covariance/_shrunk_covariance.py
+++ b/sklearn/covariance/_shrunk_covariance.py
@@ -565,7 +565,6 @@ def oas(X, *, assume_centered=False):
     The formula we used to implement the OAS is slightly modified compared
     to the one given in the article. See :class:`OAS` for more details.
     """
-
     estimator = OAS(
         assume_centered=assume_centered,
     ).fit(X)

From 2a1ecca193d799071ab2d65ce0f45fd5538cb582 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 29 Dec 2022 17:45:03 +0100
Subject: [PATCH 4/9] revert centering

---
 sklearn/covariance/_shrunk_covariance.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py
index e869f2c6b5203..d45a9530a8e5a 100644
--- a/sklearn/covariance/_shrunk_covariance.py
+++ b/sklearn/covariance/_shrunk_covariance.py
@@ -53,9 +53,7 @@ def _oas(X, *, assume_centered=False):
 
     n_samples, n_features = X.shape
 
-    if not assume_centered:
-        X = X - X.mean(axis=0)
-    emp_cov = empirical_covariance(X, assume_centered=True)
+    emp_cov = empirical_covariance(X, assume_centered=assume_centered)
     mu = np.trace(emp_cov)
 
     # formula from Chen et al.'s **implementation**

From 61ab97d6e946e29d9d7ea05d86b4c59721140246 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 29 Dec 2022 19:00:47 +0100
Subject: [PATCH 5/9] DOC add documentation around with original article

---
 sklearn/covariance/_shrunk_covariance.py | 92 +++++++++++++++---------
 1 file changed, 60 insertions(+), 32 deletions(-)

diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py
index d45a9530a8e5a..fcaa30717b9da 100644
--- a/sklearn/covariance/_shrunk_covariance.py
+++ b/sklearn/covariance/_shrunk_covariance.py
@@ -44,9 +44,16 @@ def _ledoit_wolf(X, *, assume_centered, block_size):
 
 
 def _oas(X, *, assume_centered=False):
-    """Estimate covariance with the Oracle Approximating Shrinkage algorithm."""
-    # for only one feature, the result is the same whatever the shrinkage
+    """Estimate covariance with the Oracle Approximating Shrinkage algorithm.
+
+    The formulation is based on [1]_.
+    [1] "Shrinkage algorithms for MMSE covariance estimation.",
+        Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O.
+        IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010.
+        https://arxiv.org/pdf/0907.4698.pdf
+    """
     if len(X.shape) == 2 and X.shape[1] == 1:
+        # for only one feature, the result is the same whatever the shrinkage
         if not assume_centered:
             X = X - X.mean()
         return np.atleast_2d((X**2).mean()), 0.0
@@ -54,16 +61,26 @@ def _oas(X, *, assume_centered=False):
     n_samples, n_features = X.shape
 
     emp_cov = empirical_covariance(X, assume_centered=assume_centered)
-    mu = np.trace(emp_cov)
-
-    # formula from Chen et al.'s **implementation**
-    alpha = np.trace(emp_cov**2)
-    num = alpha + mu**2
-    den = (n_samples + 1.0) * (alpha - (mu**2) / n_features)
-
+    trace_emp_cov = np.trace(emp_cov)
+
+    # The shrinkage is defined as:
+    # shrinkage = min(
+    # trace(S**2) + trace(S)**2) / ((n + 1) (trace(S**2) - trace(S)**2 / p), 1
+    # )
+    # where n and p are n_samples and n_features, respectively (cf. Eq. 23 in [1]).
+    # The factor 2 / p is omitted since it does not value of the estimator for large p.
+    trace_squared_emp_cov = trace_emp_cov**2
+    trace_emp_cov_squared = np.trace(emp_cov**2)
+    num = trace_emp_cov_squared + trace_squared_emp_cov
+    den = (n_samples + 1) * (trace_emp_cov_squared - trace_squared_emp_cov / n_features)
     shrinkage = 1.0 if den == 0 else max(0, min(num / den, 1.0))
+
+    # The shrunk covariance is defined as:
+    # (1 - shrinkage) * S + shrinkage * F (cf. Eq. 4 in [1])
+    # where S is the empirical covariance and F is the shrinkage target defined as
+    # F = trace(S) / n_features * np.identity(n_features) (cf. Eq. 3 in [1])
     shrunk_cov = (1.0 - shrinkage) * emp_cov
-    shrunk_cov.flat[:: n_features + 1] += shrinkage * mu / n_features
+    shrunk_cov.flat[:: n_features + 1] += shrinkage * trace_emp_cov / n_features
 
     return shrunk_cov, shrinkage
 
@@ -530,7 +547,9 @@ def fit(self, X, y=None):
 # OAS estimator
 @validate_params({"X": ["array-like"]})
 def oas(X, *, assume_centered=False):
-    """Estimate covariance with the Oracle Approximating Shrinkage algorithm.
+    """Estimate covariance with the Oracle Approximating Shrinkage as proposed in [1]_.
+
+    Read more in the :ref:`User Guide <shrunk_covariance>`.
 
     Parameters
     ----------
@@ -554,14 +573,25 @@ def oas(X, *, assume_centered=False):
 
     Notes
     -----
-    The regularised (shrunk) covariance is:
+    The regularised covariance is:
 
-    (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)
+    (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features),
 
-    where mu = trace(cov) / n_features
+    where mu = trace(cov) / n_features and shrinkage is given by the OAS formula
+    (see [1]_).
+
+    The shrinkage formulation implemented here differs from Eq. 23 in [1]_. In
+    the original article, formula (23) states that 2/p (p being the number of
+    features) is multiplied by Trace(cov*cov) in both the numerator and
+    denominator, but this operation is omitted because for a large p, the value
+    of 2/p is so small that it doesn't affect the value of the estimator.
 
-    The formula we used to implement the OAS is slightly modified compared
-    to the one given in the article. See :class:`OAS` for more details.
+    References
+    ----------
+    .. [1] :arxiv:`"Shrinkage algorithms for MMSE covariance estimation.",
+           Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O.
+           IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010.
+           <0907.4698>`
     """
     estimator = OAS(
         assume_centered=assume_centered,
@@ -570,20 +600,10 @@ def oas(X, *, assume_centered=False):
 
 
 class OAS(EmpiricalCovariance):
-    """Oracle Approximating Shrinkage Estimator.
+    """Oracle Approximating Shrinkage Estimator as proposed in [1]_.
 
     Read more in the :ref:`User Guide <shrunk_covariance>`.
 
-    OAS is a particular form of shrinkage described in
-    "Shrinkage Algorithms for MMSE Covariance Estimation"
-    Chen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.
-
-    The formula used here does not correspond to the one given in the
-    article. In the original article, formula (23) states that 2/p is
-    multiplied by Trace(cov*cov) in both the numerator and denominator, but
-    this operation is omitted because for a large p, the value of 2/p is
-    so small that it doesn't affect the value of the estimator.
-
     Parameters
     ----------
     store_precision : bool, default=True
@@ -640,15 +660,23 @@ class OAS(EmpiricalCovariance):
     -----
     The regularised covariance is:
 
-    (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)
+    (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features),
 
-    where mu = trace(cov) / n_features
-    and shrinkage is given by the OAS formula (see References)
+    where mu = trace(cov) / n_features and shrinkage is given by the OAS formula
+    (see [1]_).
+
+    The shrinkage formulation implemented here differs from Eq. 23 in [1]_. In
+    the original article, formula (23) states that 2/p (p being the number of
+    features) is multiplied by Trace(cov*cov) in both the numerator and
+    denominator, but this operation is omitted because for a large p, the value
+    of 2/p is so small that it doesn't affect the value of the estimator.
 
     References
     ----------
-    "Shrinkage Algorithms for MMSE Covariance Estimation"
-    Chen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.
+    .. [1] :arxiv:`"Shrinkage algorithms for MMSE covariance estimation.",
+           Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O.
+           IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010.
+           <0907.4698>`
 
     Examples
     --------

From a1ff4f6d3cb9f9b7092295d45a70eed70d85dacc Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 29 Dec 2022 19:02:20 +0100
Subject: [PATCH 6/9] typo

---
 sklearn/covariance/_shrunk_covariance.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py
index fcaa30717b9da..31b6264f6d15e 100644
--- a/sklearn/covariance/_shrunk_covariance.py
+++ b/sklearn/covariance/_shrunk_covariance.py
@@ -68,7 +68,8 @@ def _oas(X, *, assume_centered=False):
     # trace(S**2) + trace(S)**2) / ((n + 1) (trace(S**2) - trace(S)**2 / p), 1
     # )
     # where n and p are n_samples and n_features, respectively (cf. Eq. 23 in [1]).
-    # The factor 2 / p is omitted since it does not value of the estimator for large p.
+    # The factor 2 / p is omitted since it does not impact the value of the estimator
+    # for large p.
     trace_squared_emp_cov = trace_emp_cov**2
     trace_emp_cov_squared = np.trace(emp_cov**2)
     num = trace_emp_cov_squared + trace_squared_emp_cov

From 94cb4f1ee665eba50f417e795a7f8fbe30196ada Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 29 Dec 2022 19:49:05 +0100
Subject: [PATCH 7/9] DOC update ref in user guide

---
 doc/modules/covariance.rst | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/modules/covariance.rst b/doc/modules/covariance.rst
index c97676ea62108..50927f9a677f6 100644
--- a/doc/modules/covariance.rst
+++ b/doc/modules/covariance.rst
@@ -160,8 +160,10 @@ object to the same sample.
 
 .. topic:: References:
 
-    .. [2] Chen et al., "Shrinkage Algorithms for MMSE Covariance Estimation",
-           IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.
+    .. [2] :arxiv:`"Shrinkage algorithms for MMSE covariance estimation.",
+           Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O.
+           IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010.
+           <0907.4698>`
 
 .. topic:: Examples:
 

From 1ae4577e73311c4fddece6f3cd6c638407fa3f75 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 1 Feb 2023 17:36:23 +0100
Subject: [PATCH 8/9] revert implementation and add comment

---
 sklearn/covariance/_shrunk_covariance.py | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py
index 31b6264f6d15e..1a3f05059e098 100644
--- a/sklearn/covariance/_shrunk_covariance.py
+++ b/sklearn/covariance/_shrunk_covariance.py
@@ -61,27 +61,35 @@ def _oas(X, *, assume_centered=False):
     n_samples, n_features = X.shape
 
     emp_cov = empirical_covariance(X, assume_centered=assume_centered)
-    trace_emp_cov = np.trace(emp_cov)
 
     # The shrinkage is defined as:
     # shrinkage = min(
-    # trace(S**2) + trace(S)**2) / ((n + 1) (trace(S**2) - trace(S)**2 / p), 1
+    # trace(S @ S.T) + trace(S)**2) / ((n + 1) (trace(S @ S.T) - trace(S)**2 / p), 1
     # )
     # where n and p are n_samples and n_features, respectively (cf. Eq. 23 in [1]).
     # The factor 2 / p is omitted since it does not impact the value of the estimator
     # for large p.
-    trace_squared_emp_cov = trace_emp_cov**2
-    trace_emp_cov_squared = np.trace(emp_cov**2)
-    num = trace_emp_cov_squared + trace_squared_emp_cov
-    den = (n_samples + 1) * (trace_emp_cov_squared - trace_squared_emp_cov / n_features)
-    shrinkage = 1.0 if den == 0 else max(0, min(num / den, 1.0))
+
+    # Instead of computing trace(S)**2, we can compute the average of the squared
+    # elements of S that is equal to trace(S)**2 / p**2.
+    # See the definition of the Frobenius norm:
+    # https://en.wikipedia.org/wiki/Matrix_norm#Frobenius_norm
+    alpha = np.mean(emp_cov**2)
+    mu = np.trace(emp_cov) / n_features
+    mu_squared = mu**2
+
+    # The factor 1 / p**2 will cancel out since it is in both the numerator and
+    # denominator
+    num = alpha + mu_squared
+    den = (n_samples + 1) * (alpha - mu_squared / n_features)
+    shrinkage = 1.0 if den == 0 else min(num / den, 1.0)
 
     # The shrunk covariance is defined as:
     # (1 - shrinkage) * S + shrinkage * F (cf. Eq. 4 in [1])
     # where S is the empirical covariance and F is the shrinkage target defined as
     # F = trace(S) / n_features * np.identity(n_features) (cf. Eq. 3 in [1])
     shrunk_cov = (1.0 - shrinkage) * emp_cov
-    shrunk_cov.flat[:: n_features + 1] += shrinkage * trace_emp_cov / n_features
+    shrunk_cov.flat[:: n_features + 1] += shrinkage * mu
 
     return shrunk_cov, shrinkage
 

From e5c0b5d8815dd6821b50aea6dba84005ca3d18b6 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 1 Feb 2023 17:40:50 +0100
Subject: [PATCH 9/9] revert doc

---
 sklearn/covariance/_shrunk_covariance.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py
index dbfa8b8e2d6cc..5cdc9f3d212ad 100644
--- a/sklearn/covariance/_shrunk_covariance.py
+++ b/sklearn/covariance/_shrunk_covariance.py
@@ -706,13 +706,13 @@ class OAS(EmpiricalCovariance):
     ...                             size=500)
     >>> oas = OAS().fit(X)
     >>> oas.covariance_
-    array([[0.7456..., 0.2644...],
-           [0.2644..., 0.4041...]])
+    array([[0.7533..., 0.2763...],
+           [0.2763..., 0.3964...]])
     >>> oas.precision_
-    array([[ 1.7463..., -1.1428...],
-           [-1.1428...,  3.2224...]])
+    array([[ 1.7833..., -1.2431... ],
+           [-1.2431...,  3.3889...]])
     >>> oas.shrinkage_
-    0.0617...
+    0.0195...
     """
 
     def fit(self, X, y=None):