From dc822bc1212ddedd795ecc031fd35607dfaffbea Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Fri, 8 Jul 2022 13:57:36 -0400 Subject: [PATCH 1/9] Fixed implementation --- sklearn/covariance/_shrunk_covariance.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py index 64fce5b1db6f6..b2aeaf79427f8 100644 --- a/sklearn/covariance/_shrunk_covariance.py +++ b/sklearn/covariance/_shrunk_covariance.py @@ -535,17 +535,19 @@ def oas(X, *, assume_centered=False): else: n_samples, n_features = X.shape - emp_cov = empirical_covariance(X, assume_centered=assume_centered) - mu = np.trace(emp_cov) / n_features - + # Resolves differences with OAS when ussing uncentered data + if not assume_centered: + X = X - X.mean(0) + emp_cov = empirical_covariance(X, assume_centered=True) + mu = np.trace(emp_cov) # formula from Chen et al.'s **implementation** - alpha = np.mean(emp_cov**2) + alpha = np.trace(emp_cov**2) num = alpha + mu**2 den = (n_samples + 1.0) * (alpha - (mu**2) / n_features) - shrinkage = 1.0 if den == 0 else min(num / den, 1.0) + shrinkage = 1.0 if den == 0 else max(0, min(num / den, 1.0)) shrunk_cov = (1.0 - shrinkage) * emp_cov - shrunk_cov.flat[:: n_features + 1] += shrinkage * mu + shrunk_cov.flat[:: n_features + 1] += shrinkage * mu / n_features return shrunk_cov, shrinkage @@ -644,13 +646,13 @@ class OAS(EmpiricalCovariance): ... size=500) >>> oas = OAS().fit(X) >>> oas.covariance_ - array([[0.7533..., 0.2763...], - [0.2763..., 0.3964...]]) + array([[0.7456..., 0.2644...], + [0.2644..., 0.4041...]]) >>> oas.precision_ - array([[ 1.7833..., -1.2431... ], - [-1.2431..., 3.3889...]]) + array([[ 1.7463..., -1.1428...], + [-1.1428..., 3.2224...]]) >>> oas.shrinkage_ - 0.0195... + 0.0617... """ def fit(self, X, y=None): From 843e08c0dde1082be1e99664533641f79fd16659 Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Wed, 20 Jul 2022 13:05:55 -0400 Subject: [PATCH 2/9] Updated changelog --- doc/whats_new/v1.2.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 7a5e92f5f960d..fb8cebe30f5d9 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -36,6 +36,11 @@ random sampling procedures. to a tiny value. Moreover, `verbose` is now properly propagated to L-BFGS-B. :pr:`23619` by :user:`Christian Lorentzen `. +- |Fix| :class:`covariance.OAS` and :func:`covariance.oas` now use the correct + formula for calculating the shrinkage parameter, and hence may produce + different results when run on the same data. + :pr:`23867` by :user:`Meekail Zain ` + Changes impacting all modules ----------------------------- @@ -282,6 +287,14 @@ Changelog :pr:`10805` by :user:`Mathias Andersen ` and :pr:`23471` by :user:`Meekail Zain ` +:mod:`sklearn.covariance` +......................... + +- |Fix| :class:`covariance.OAS` and :func:`covariance.oas` now use the correct + formula for calculating the shrinkage parameter, and hence may produce + different results when run on the same data. + :pr:`23867` by :user:`Meekail Zain ` + Code and Documentation Contributors ----------------------------------- From b37dae03381aab858b74acfa68d9b91cecf9f441 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 29 Dec 2022 17:43:05 +0100 Subject: [PATCH 3/9] remove useless diff --- sklearn/covariance/_shrunk_covariance.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py index 2d96e22892243..e869f2c6b5203 100644 --- a/sklearn/covariance/_shrunk_covariance.py +++ b/sklearn/covariance/_shrunk_covariance.py @@ -565,7 +565,6 @@ def oas(X, *, assume_centered=False): The formula we used to implement the OAS is slightly modified compared to the one given in the article. See :class:`OAS` for more details. """ - estimator = OAS( assume_centered=assume_centered, ).fit(X) From 2a1ecca193d799071ab2d65ce0f45fd5538cb582 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 29 Dec 2022 17:45:03 +0100 Subject: [PATCH 4/9] revert centering --- sklearn/covariance/_shrunk_covariance.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py index e869f2c6b5203..d45a9530a8e5a 100644 --- a/sklearn/covariance/_shrunk_covariance.py +++ b/sklearn/covariance/_shrunk_covariance.py @@ -53,9 +53,7 @@ def _oas(X, *, assume_centered=False): n_samples, n_features = X.shape - if not assume_centered: - X = X - X.mean(axis=0) - emp_cov = empirical_covariance(X, assume_centered=True) + emp_cov = empirical_covariance(X, assume_centered=assume_centered) mu = np.trace(emp_cov) # formula from Chen et al.'s **implementation** From 61ab97d6e946e29d9d7ea05d86b4c59721140246 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 29 Dec 2022 19:00:47 +0100 Subject: [PATCH 5/9] DOC add documentation around with original article --- sklearn/covariance/_shrunk_covariance.py | 92 +++++++++++++++--------- 1 file changed, 60 insertions(+), 32 deletions(-) diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py index d45a9530a8e5a..fcaa30717b9da 100644 --- a/sklearn/covariance/_shrunk_covariance.py +++ b/sklearn/covariance/_shrunk_covariance.py @@ -44,9 +44,16 @@ def _ledoit_wolf(X, *, assume_centered, block_size): def _oas(X, *, assume_centered=False): - """Estimate covariance with the Oracle Approximating Shrinkage algorithm.""" - # for only one feature, the result is the same whatever the shrinkage + """Estimate covariance with the Oracle Approximating Shrinkage algorithm. + + The formulation is based on [1]_. + [1] "Shrinkage algorithms for MMSE covariance estimation.", + Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. + IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010. + https://arxiv.org/pdf/0907.4698.pdf + """ if len(X.shape) == 2 and X.shape[1] == 1: + # for only one feature, the result is the same whatever the shrinkage if not assume_centered: X = X - X.mean() return np.atleast_2d((X**2).mean()), 0.0 @@ -54,16 +61,26 @@ def _oas(X, *, assume_centered=False): n_samples, n_features = X.shape emp_cov = empirical_covariance(X, assume_centered=assume_centered) - mu = np.trace(emp_cov) - - # formula from Chen et al.'s **implementation** - alpha = np.trace(emp_cov**2) - num = alpha + mu**2 - den = (n_samples + 1.0) * (alpha - (mu**2) / n_features) - + trace_emp_cov = np.trace(emp_cov) + + # The shrinkage is defined as: + # shrinkage = min( + # trace(S**2) + trace(S)**2) / ((n + 1) (trace(S**2) - trace(S)**2 / p), 1 + # ) + # where n and p are n_samples and n_features, respectively (cf. Eq. 23 in [1]). + # The factor 2 / p is omitted since it does not value of the estimator for large p. + trace_squared_emp_cov = trace_emp_cov**2 + trace_emp_cov_squared = np.trace(emp_cov**2) + num = trace_emp_cov_squared + trace_squared_emp_cov + den = (n_samples + 1) * (trace_emp_cov_squared - trace_squared_emp_cov / n_features) shrinkage = 1.0 if den == 0 else max(0, min(num / den, 1.0)) + + # The shrunk covariance is defined as: + # (1 - shrinkage) * S + shrinkage * F (cf. Eq. 4 in [1]) + # where S is the empirical covariance and F is the shrinkage target defined as + # F = trace(S) / n_features * np.identity(n_features) (cf. Eq. 3 in [1]) shrunk_cov = (1.0 - shrinkage) * emp_cov - shrunk_cov.flat[:: n_features + 1] += shrinkage * mu / n_features + shrunk_cov.flat[:: n_features + 1] += shrinkage * trace_emp_cov / n_features return shrunk_cov, shrinkage @@ -530,7 +547,9 @@ def fit(self, X, y=None): # OAS estimator @validate_params({"X": ["array-like"]}) def oas(X, *, assume_centered=False): - """Estimate covariance with the Oracle Approximating Shrinkage algorithm. + """Estimate covariance with the Oracle Approximating Shrinkage as proposed in [1]_. + + Read more in the :ref:`User Guide `. Parameters ---------- @@ -554,14 +573,25 @@ def oas(X, *, assume_centered=False): Notes ----- - The regularised (shrunk) covariance is: + The regularised covariance is: - (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features) + (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features), - where mu = trace(cov) / n_features + where mu = trace(cov) / n_features and shrinkage is given by the OAS formula + (see [1]_). + + The shrinkage formulation implemented here differs from Eq. 23 in [1]_. In + the original article, formula (23) states that 2/p (p being the number of + features) is multiplied by Trace(cov*cov) in both the numerator and + denominator, but this operation is omitted because for a large p, the value + of 2/p is so small that it doesn't affect the value of the estimator. - The formula we used to implement the OAS is slightly modified compared - to the one given in the article. See :class:`OAS` for more details. + References + ---------- + .. [1] :arxiv:`"Shrinkage algorithms for MMSE covariance estimation.", + Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. + IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010. + <0907.4698>` """ estimator = OAS( assume_centered=assume_centered, @@ -570,20 +600,10 @@ def oas(X, *, assume_centered=False): class OAS(EmpiricalCovariance): - """Oracle Approximating Shrinkage Estimator. + """Oracle Approximating Shrinkage Estimator as proposed in [1]_. Read more in the :ref:`User Guide `. - OAS is a particular form of shrinkage described in - "Shrinkage Algorithms for MMSE Covariance Estimation" - Chen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010. - - The formula used here does not correspond to the one given in the - article. In the original article, formula (23) states that 2/p is - multiplied by Trace(cov*cov) in both the numerator and denominator, but - this operation is omitted because for a large p, the value of 2/p is - so small that it doesn't affect the value of the estimator. - Parameters ---------- store_precision : bool, default=True @@ -640,15 +660,23 @@ class OAS(EmpiricalCovariance): ----- The regularised covariance is: - (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features) + (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features), - where mu = trace(cov) / n_features - and shrinkage is given by the OAS formula (see References) + where mu = trace(cov) / n_features and shrinkage is given by the OAS formula + (see [1]_). + + The shrinkage formulation implemented here differs from Eq. 23 in [1]_. In + the original article, formula (23) states that 2/p (p being the number of + features) is multiplied by Trace(cov*cov) in both the numerator and + denominator, but this operation is omitted because for a large p, the value + of 2/p is so small that it doesn't affect the value of the estimator. References ---------- - "Shrinkage Algorithms for MMSE Covariance Estimation" - Chen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010. + .. [1] :arxiv:`"Shrinkage algorithms for MMSE covariance estimation.", + Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. + IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010. + <0907.4698>` Examples -------- From a1ff4f6d3cb9f9b7092295d45a70eed70d85dacc Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 29 Dec 2022 19:02:20 +0100 Subject: [PATCH 6/9] typo --- sklearn/covariance/_shrunk_covariance.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py index fcaa30717b9da..31b6264f6d15e 100644 --- a/sklearn/covariance/_shrunk_covariance.py +++ b/sklearn/covariance/_shrunk_covariance.py @@ -68,7 +68,8 @@ def _oas(X, *, assume_centered=False): # trace(S**2) + trace(S)**2) / ((n + 1) (trace(S**2) - trace(S)**2 / p), 1 # ) # where n and p are n_samples and n_features, respectively (cf. Eq. 23 in [1]). - # The factor 2 / p is omitted since it does not value of the estimator for large p. + # The factor 2 / p is omitted since it does not impact the value of the estimator + # for large p. trace_squared_emp_cov = trace_emp_cov**2 trace_emp_cov_squared = np.trace(emp_cov**2) num = trace_emp_cov_squared + trace_squared_emp_cov From 94cb4f1ee665eba50f417e795a7f8fbe30196ada Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 29 Dec 2022 19:49:05 +0100 Subject: [PATCH 7/9] DOC update ref in user guide --- doc/modules/covariance.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/modules/covariance.rst b/doc/modules/covariance.rst index c97676ea62108..50927f9a677f6 100644 --- a/doc/modules/covariance.rst +++ b/doc/modules/covariance.rst @@ -160,8 +160,10 @@ object to the same sample. .. topic:: References: - .. [2] Chen et al., "Shrinkage Algorithms for MMSE Covariance Estimation", - IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010. + .. [2] :arxiv:`"Shrinkage algorithms for MMSE covariance estimation.", + Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. + IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010. + <0907.4698>` .. topic:: Examples: From 1ae4577e73311c4fddece6f3cd6c638407fa3f75 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 1 Feb 2023 17:36:23 +0100 Subject: [PATCH 8/9] revert implementation and add comment --- sklearn/covariance/_shrunk_covariance.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py index 31b6264f6d15e..1a3f05059e098 100644 --- a/sklearn/covariance/_shrunk_covariance.py +++ b/sklearn/covariance/_shrunk_covariance.py @@ -61,27 +61,35 @@ def _oas(X, *, assume_centered=False): n_samples, n_features = X.shape emp_cov = empirical_covariance(X, assume_centered=assume_centered) - trace_emp_cov = np.trace(emp_cov) # The shrinkage is defined as: # shrinkage = min( - # trace(S**2) + trace(S)**2) / ((n + 1) (trace(S**2) - trace(S)**2 / p), 1 + # trace(S @ S.T) + trace(S)**2) / ((n + 1) (trace(S @ S.T) - trace(S)**2 / p), 1 # ) # where n and p are n_samples and n_features, respectively (cf. Eq. 23 in [1]). # The factor 2 / p is omitted since it does not impact the value of the estimator # for large p. - trace_squared_emp_cov = trace_emp_cov**2 - trace_emp_cov_squared = np.trace(emp_cov**2) - num = trace_emp_cov_squared + trace_squared_emp_cov - den = (n_samples + 1) * (trace_emp_cov_squared - trace_squared_emp_cov / n_features) - shrinkage = 1.0 if den == 0 else max(0, min(num / den, 1.0)) + + # Instead of computing trace(S)**2, we can compute the average of the squared + # elements of S that is equal to trace(S)**2 / p**2. + # See the definition of the Frobenius norm: + # https://en.wikipedia.org/wiki/Matrix_norm#Frobenius_norm + alpha = np.mean(emp_cov**2) + mu = np.trace(emp_cov) / n_features + mu_squared = mu**2 + + # The factor 1 / p**2 will cancel out since it is in both the numerator and + # denominator + num = alpha + mu_squared + den = (n_samples + 1) * (alpha - mu_squared / n_features) + shrinkage = 1.0 if den == 0 else min(num / den, 1.0) # The shrunk covariance is defined as: # (1 - shrinkage) * S + shrinkage * F (cf. Eq. 4 in [1]) # where S is the empirical covariance and F is the shrinkage target defined as # F = trace(S) / n_features * np.identity(n_features) (cf. Eq. 3 in [1]) shrunk_cov = (1.0 - shrinkage) * emp_cov - shrunk_cov.flat[:: n_features + 1] += shrinkage * trace_emp_cov / n_features + shrunk_cov.flat[:: n_features + 1] += shrinkage * mu return shrunk_cov, shrinkage From e5c0b5d8815dd6821b50aea6dba84005ca3d18b6 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 1 Feb 2023 17:40:50 +0100 Subject: [PATCH 9/9] revert doc --- sklearn/covariance/_shrunk_covariance.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py index dbfa8b8e2d6cc..5cdc9f3d212ad 100644 --- a/sklearn/covariance/_shrunk_covariance.py +++ b/sklearn/covariance/_shrunk_covariance.py @@ -706,13 +706,13 @@ class OAS(EmpiricalCovariance): ... size=500) >>> oas = OAS().fit(X) >>> oas.covariance_ - array([[0.7456..., 0.2644...], - [0.2644..., 0.4041...]]) + array([[0.7533..., 0.2763...], + [0.2763..., 0.3964...]]) >>> oas.precision_ - array([[ 1.7463..., -1.1428...], - [-1.1428..., 3.2224...]]) + array([[ 1.7833..., -1.2431... ], + [-1.2431..., 3.3889...]]) >>> oas.shrinkage_ - 0.0617... + 0.0195... """ def fit(self, X, y=None):