From a71eb91734ec694c9b4146982f840c9a30e9fb56 Mon Sep 17 00:00:00 2001
From: Dmytro S Lituiev <d.lituiev@gmail.com>
Date: Mon, 13 Feb 2017 11:21:39 -0800
Subject: [PATCH 01/43] r_regression and abs_r_regression added

---
 .../feature_selection/univariate_selection.py | 77 ++++++++++++++++---
 1 file changed, 65 insertions(+), 12 deletions(-)

diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py
index f1d6047f0b55e..bf07970ea55c6 100644
--- a/sklearn/feature_selection/univariate_selection.py
+++ b/sklearn/feature_selection/univariate_selection.py
@@ -227,18 +227,15 @@ def chi2(X, y):
     return _chisquare(observed, expected)
 
 
-def f_regression(X, y, center=True):
-    """Univariate linear regression tests.
+def r_regression(X, y, center=True):
+    """Univariate linear regression tests returning Pearson R.
 
     Quick linear model for testing the effect of a single regressor,
     sequentially for many regressors.
 
-    This is done in 2 steps:
-
-    1. The cross correlation between each regressor and the target is computed,
+    The cross correlation between each regressor and the target is computed,
        that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *
        std(y)).
-    2. It is converted to an F score then to a p-value.
 
     Read more in the :ref:`User Guide <univariate_feature_selection>`.
 
@@ -255,14 +252,12 @@ def f_regression(X, y, center=True):
 
     Returns
     -------
-    F : array, shape=(n_features,)
-        F values of features.
-
-    pval : array, shape=(n_features,)
-        p-values of F-scores.
+    corr : array, shape=(n_features,)
+        Pearson R correlation coefficients of features.
 
     See also
     --------
+    f_regression: Univariate linear regression tests returning f-statistic and p-values
     f_classif: ANOVA F-value between label/feature for classification tasks.
     chi2: Chi-squared stats of non-negative features for classification tasks.
     """
@@ -288,14 +283,70 @@ def f_regression(X, y, center=True):
     corr = safe_sparse_dot(y, X)
     corr /= X_norms
     corr /= norm(y)
+    return corr
 
-    # convert to p-value
+def f_regression(X, y, center=True):
+    """Univariate linear regression tests returning F-statistic and p-values.
+
+    Quick linear model for testing the effect of a single regressor,
+    sequentially for many regressors.
+
+    This is done in 2 steps:
+
+    1. The cross correlation between each regressor and the target is computed,
+       that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *
+       std(y)) using r_regression function.
+    2. It is converted to an F score and then to a p-value.
+
+    Read more in the :ref:`User Guide <univariate_feature_selection>`.
+
+    Parameters
+    ----------
+    X : {array-like, sparse matrix}  shape = (n_samples, n_features)
+        The set of regressors that will be tested sequentially.
+
+    y : array of shape(n_samples).
+        The data matrix
+
+    center : True, bool,
+        If true, X and y will be centered.
+
+    Returns
+    -------
+    F : array, shape=(n_features,)
+        F values of features.
+
+    pval : array, shape=(n_features,)
+        p-values of F-scores.
+
+    See also
+    --------
+    r_regression: Univariate linear regression tests returning Pearson R.
+    f_classif: ANOVA F-value between label/feature for classification tasks.
+    chi2: Chi-squared stats of non-negative features for classification tasks.
+    """
+
+    # compute the correlation
+    corr = r_regression(X, y, center=center)
     degrees_of_freedom = y.size - (2 if center else 1)
+    # convert to p-value
     F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
     pv = stats.f.sf(F, 1, degrees_of_freedom)
     return F, pv
 
 
+def abs_r_regression(X, y, center=True):
+    """Univariate linear regression tests returning absolute value of Pearson R.
+
+    This convenience wrapper is to be used with SelectKBest and other models
+    that require a statistic which is increases with significance of association.
+
+    see r_regression for details.
+    """
+   # compute the correlation
+   corr = r_regression(X, y, center=center)
+   return abs(corr)
+
 ######################################################################
 # Base classes
 
@@ -464,6 +515,8 @@ class SelectKBest(_BaseFilter):
     f_classif: ANOVA F-value between label/feature for classification tasks.
     mutual_info_classif: Mutual information for a discrete target.
     chi2: Chi-squared stats of non-negative features for classification tasks.
+    abs_r_regression: absolute value of Pearson R between label/feature for 
+        regression tasks.
     f_regression: F-value between label/feature for regression tasks.
     mutual_info_regression: Mutual information for a continious target.
     SelectPercentile: Select features based on percentile of the highest scores.

From 88ade5322bb2f5752fd44de3e470776d6693e6ec Mon Sep 17 00:00:00 2001
From: Dmytro S Lituiev <d.lituiev@gmail.com>
Date: Mon, 13 Feb 2017 11:21:39 -0800
Subject: [PATCH 02/43] r_regression and abs_r_regression added

---
 .../feature_selection/univariate_selection.py | 77 ++++++++++++++++---
 1 file changed, 65 insertions(+), 12 deletions(-)

diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py
index f1d6047f0b55e..9430fbee7c687 100644
--- a/sklearn/feature_selection/univariate_selection.py
+++ b/sklearn/feature_selection/univariate_selection.py
@@ -227,18 +227,15 @@ def chi2(X, y):
     return _chisquare(observed, expected)
 
 
-def f_regression(X, y, center=True):
-    """Univariate linear regression tests.
+def r_regression(X, y, center=True):
+    """Univariate linear regression tests returning Pearson R.
 
     Quick linear model for testing the effect of a single regressor,
     sequentially for many regressors.
 
-    This is done in 2 steps:
-
-    1. The cross correlation between each regressor and the target is computed,
+    The cross correlation between each regressor and the target is computed,
        that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *
        std(y)).
-    2. It is converted to an F score then to a p-value.
 
     Read more in the :ref:`User Guide <univariate_feature_selection>`.
 
@@ -255,14 +252,12 @@ def f_regression(X, y, center=True):
 
     Returns
     -------
-    F : array, shape=(n_features,)
-        F values of features.
-
-    pval : array, shape=(n_features,)
-        p-values of F-scores.
+    corr : array, shape=(n_features,)
+        Pearson R correlation coefficients of features.
 
     See also
     --------
+    f_regression: Univariate linear regression tests returning f-statistic and p-values
     f_classif: ANOVA F-value between label/feature for classification tasks.
     chi2: Chi-squared stats of non-negative features for classification tasks.
     """
@@ -288,14 +283,70 @@ def f_regression(X, y, center=True):
     corr = safe_sparse_dot(y, X)
     corr /= X_norms
     corr /= norm(y)
+    return corr
 
-    # convert to p-value
+def f_regression(X, y, center=True):
+    """Univariate linear regression tests returning F-statistic and p-values.
+
+    Quick linear model for testing the effect of a single regressor,
+    sequentially for many regressors.
+
+    This is done in 2 steps:
+
+    1. The cross correlation between each regressor and the target is computed,
+       that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *
+       std(y)) using r_regression function.
+    2. It is converted to an F score and then to a p-value.
+
+    Read more in the :ref:`User Guide <univariate_feature_selection>`.
+
+    Parameters
+    ----------
+    X : {array-like, sparse matrix}  shape = (n_samples, n_features)
+        The set of regressors that will be tested sequentially.
+
+    y : array of shape(n_samples).
+        The data matrix
+
+    center : True, bool,
+        If true, X and y will be centered.
+
+    Returns
+    -------
+    F : array, shape=(n_features,)
+        F values of features.
+
+    pval : array, shape=(n_features,)
+        p-values of F-scores.
+
+    See also
+    --------
+    r_regression: Univariate linear regression tests returning Pearson R.
+    f_classif: ANOVA F-value between label/feature for classification tasks.
+    chi2: Chi-squared stats of non-negative features for classification tasks.
+    """
+
+    # compute the correlation
+    corr = r_regression(X, y, center=center)
     degrees_of_freedom = y.size - (2 if center else 1)
+    # convert to p-value
     F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
     pv = stats.f.sf(F, 1, degrees_of_freedom)
     return F, pv
 
 
+def abs_r_regression(X, y, center=True):
+   """Univariate linear regression tests returning absolute value of Pearson R.
+
+   This convenience wrapper is to be used with SelectKBest and other models
+   that require a statistic which is increases with significance of association.
+
+   see r_regression for details.
+   """
+   # compute the correlation
+   corr = r_regression(X, y, center=center)
+   return abs(corr)
+
 ######################################################################
 # Base classes
 
@@ -464,6 +515,8 @@ class SelectKBest(_BaseFilter):
     f_classif: ANOVA F-value between label/feature for classification tasks.
     mutual_info_classif: Mutual information for a discrete target.
     chi2: Chi-squared stats of non-negative features for classification tasks.
+    abs_r_regression: absolute value of Pearson R between label/feature for 
+        regression tasks.
     f_regression: F-value between label/feature for regression tasks.
     mutual_info_regression: Mutual information for a continious target.
     SelectPercentile: Select features based on percentile of the highest scores.

From fbe3f97ee48c57631d267c1c9318033246149bb7 Mon Sep 17 00:00:00 2001
From: Dmytro S Lituiev <d.lituiev@gmail.com>
Date: Wed, 15 Feb 2017 10:23:35 -0800
Subject: [PATCH 03/43] whitespace fix

---
 sklearn/feature_selection/univariate_selection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py
index 9430fbee7c687..28a270e0dc2ef 100644
--- a/sklearn/feature_selection/univariate_selection.py
+++ b/sklearn/feature_selection/univariate_selection.py
@@ -515,7 +515,7 @@ class SelectKBest(_BaseFilter):
     f_classif: ANOVA F-value between label/feature for classification tasks.
     mutual_info_classif: Mutual information for a discrete target.
     chi2: Chi-squared stats of non-negative features for classification tasks.
-    abs_r_regression: absolute value of Pearson R between label/feature for 
+    abs_r_regression: absolute value of Pearson R between label/feature for
         regression tasks.
     f_regression: F-value between label/feature for regression tasks.
     mutual_info_regression: Mutual information for a continious target.

From f4f6a1abd1888910c0c917a4a44c657e1218bc18 Mon Sep 17 00:00:00 2001
From: Dmytro S Lituiev <d.lituiev@gmail.com>
Date: Wed, 15 Feb 2017 10:46:16 -0800
Subject: [PATCH 04/43] import and tests

---
 sklearn/feature_selection/__init__.py         |  2 +
 .../tests/test_feature_select.py              | 48 +++++++++++++++++--
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/sklearn/feature_selection/__init__.py b/sklearn/feature_selection/__init__.py
index ffa392b5b26db..038883ce5ed8e 100644
--- a/sklearn/feature_selection/__init__.py
+++ b/sklearn/feature_selection/__init__.py
@@ -8,6 +8,8 @@
 from .univariate_selection import f_classif
 from .univariate_selection import f_oneway
 from .univariate_selection import f_regression
+from .univariate_selection import r_regression
+from .univariate_selection import abs_r_regression
 from .univariate_selection import SelectPercentile
 from .univariate_selection import SelectKBest
 from .univariate_selection import SelectFpr
diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index 6567cc3d16493..1d0e7b8d0d6dd 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -26,9 +26,10 @@
 from sklearn.datasets.samples_generator import (make_classification,
                                                 make_regression)
 from sklearn.feature_selection import (
-    chi2, f_classif, f_oneway, f_regression, mutual_info_classif,
-    mutual_info_regression, SelectPercentile, SelectKBest, SelectFpr,
-    SelectFdr, SelectFwe, GenericUnivariateSelect)
+    chi2, f_classif, f_oneway, f_regression, abs_r_regression,
+    mutual_info_classif, mutual_info_regression, SelectPercentile,
+    SelectKBest, SelectFpr, SelectFdr, SelectFwe,
+    GenericUnivariateSelect)
 
 
 ##############################################################################
@@ -79,6 +80,28 @@ def test_f_classif():
     assert_array_almost_equal(pv_sparse, pv)
 
 
+def test_abs_r_regression():
+    # Test whether the F test yields meaningful results
+    # on a simple simulated regression problem
+    X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
+                           shuffle=False, random_state=0)
+
+    abs_pearson_r = abs_r_regression(X, y)
+    assert_true((abs_pearson_r < 1).all())
+    assert_true((abs_pearson_r[:5] > 0.1).all())
+    assert_true((abs_pearson_r[5:] < 0.2 ).all())
+
+    # with centering, compare with sparse
+    abs_pearson_r = f_regression(X, y, center=True)
+    abs_pearson_r_sparse = f_regression(sparse.csr_matrix(X), y, center=True)
+    assert_array_almost_equal(abs_pearson_r_sparse, abs_pearson_r)
+
+    # again without centering, compare with sparse
+    abs_pearson_r = f_regression(X, y, center=False)
+    abs_pearson_r_sparse = f_regression(sparse.csr_matrix(X), y, center=False)
+    assert_array_almost_equal(abs_pearson_r_sparse, abs_pearson_r)
+
+
 def test_f_regression():
     # Test whether the F test yields meaningful results
     # on a simple simulated regression problem
@@ -357,6 +380,25 @@ def test_select_kbest_regression():
     assert_array_equal(support, gtruth)
 
 
+def test_select_kbest_abs_r_regression():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple regression problem
+    # with the k best heuristic
+    X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
+                           shuffle=False, random_state=0, noise=10)
+
+    univariate_filter = SelectKBest(abs_r_regression, k=5)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    assert_best_scores_kept(univariate_filter)
+    X_r2 = GenericUnivariateSelect(
+        f_regression, mode='k_best', param=5).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    assert_array_equal(support, gtruth)
+
+
 def test_select_heuristics_regression():
     # Test whether the relative univariate feature selection
     # gets the correct items in a simple regression problem

From 0fdfa134917037a1714308b653f18dece9a011f3 Mon Sep 17 00:00:00 2001
From: Dmytro S Lituiev <d.lituiev@gmail.com>
Date: Wed, 15 Feb 2017 11:00:41 -0800
Subject: [PATCH 05/43] indentation

---
 .../feature_selection/univariate_selection.py    | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py
index 28a270e0dc2ef..03377e071858d 100644
--- a/sklearn/feature_selection/univariate_selection.py
+++ b/sklearn/feature_selection/univariate_selection.py
@@ -336,16 +336,16 @@ def f_regression(X, y, center=True):
 
 
 def abs_r_regression(X, y, center=True):
-   """Univariate linear regression tests returning absolute value of Pearson R.
+    """Univariate linear regression tests returning absolute value of Pearson R.
 
-   This convenience wrapper is to be used with SelectKBest and other models
-   that require a statistic which is increases with significance of association.
+    This convenience wrapper is to be used with SelectKBest and other models
+    that require a statistic which is increases with significance of association.
 
-   see r_regression for details.
-   """
-   # compute the correlation
-   corr = r_regression(X, y, center=center)
-   return abs(corr)
+    see r_regression for details.
+    """
+    # compute the correlation
+    corr = r_regression(X, y, center=center)
+    return abs(corr)
 
 ######################################################################
 # Base classes

From 3b5498fd9e79ac4701240d35db48c8a9415634b0 Mon Sep 17 00:00:00 2001
From: Dmytro S Lituiev <d.lituiev@gmail.com>
Date: Wed, 15 Feb 2017 11:50:00 -0800
Subject: [PATCH 06/43] code style

---
 sklearn/feature_selection/tests/test_feature_select.py | 2 +-
 sklearn/feature_selection/univariate_selection.py      | 9 ++++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index 1d0e7b8d0d6dd..dd12aac480654 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -89,7 +89,7 @@ def test_abs_r_regression():
     abs_pearson_r = abs_r_regression(X, y)
     assert_true((abs_pearson_r < 1).all())
     assert_true((abs_pearson_r[:5] > 0.1).all())
-    assert_true((abs_pearson_r[5:] < 0.2 ).all())
+    assert_true((abs_pearson_r[5:] < 0.2).all())
 
     # with centering, compare with sparse
     abs_pearson_r = f_regression(X, y, center=True)
diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py
index 03377e071858d..fe4782899e6d9 100644
--- a/sklearn/feature_selection/univariate_selection.py
+++ b/sklearn/feature_selection/univariate_selection.py
@@ -257,7 +257,8 @@ def r_regression(X, y, center=True):
 
     See also
     --------
-    f_regression: Univariate linear regression tests returning f-statistic and p-values
+    f_regression: Univariate linear regression tests returning f-statistic
+        and p-values
     f_classif: ANOVA F-value between label/feature for classification tasks.
     chi2: Chi-squared stats of non-negative features for classification tasks.
     """
@@ -285,6 +286,7 @@ def r_regression(X, y, center=True):
     corr /= norm(y)
     return corr
 
+
 def f_regression(X, y, center=True):
     """Univariate linear regression tests returning F-statistic and p-values.
 
@@ -336,10 +338,11 @@ def f_regression(X, y, center=True):
 
 
 def abs_r_regression(X, y, center=True):
-    """Univariate linear regression tests returning absolute value of Pearson R.
+    """Absolute value of Pearson R from univariate linear regressions.
 
     This convenience wrapper is to be used with SelectKBest and other models
-    that require a statistic which is increases with significance of association.
+    that require a statistic which is increases with significance of
+    association.
 
     see r_regression for details.
     """

From fa5dfe3161a2e365efed0237ee8d7a82d7444191 Mon Sep 17 00:00:00 2001
From: Dmytro S Lituiev <d.lituiev@gmail.com>
Date: Wed, 15 Feb 2017 11:52:24 -0800
Subject: [PATCH 07/43] init fix

---
 sklearn/feature_selection/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/feature_selection/__init__.py b/sklearn/feature_selection/__init__.py
index 038883ce5ed8e..5584eafc831c2 100644
--- a/sklearn/feature_selection/__init__.py
+++ b/sklearn/feature_selection/__init__.py
@@ -41,5 +41,7 @@
            'f_classif',
            'f_oneway',
            'f_regression',
+           'r_regression',
+           'abs_r_regression',
            'mutual_info_classif',
            'mutual_info_regression']

From f6dcf523fca8dbade01d97f3e871adbe3ad08336 Mon Sep 17 00:00:00 2001
From: "Julien Jerphanion (@jjerphan)" <git@jjerphan.xyz>
Date: Sat, 9 May 2020 16:15:21 +0200
Subject: [PATCH 08/43] Change assert_true for assert

---
 sklearn/feature_selection/tests/test_feature_select.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index 923ff14321770..70094e8748af3 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -79,9 +79,9 @@ def test_abs_r_regression():
                            shuffle=False, random_state=0)
 
     abs_pearson_r = abs_r_regression(X, y)
-    assert_true((abs_pearson_r < 1).all())
-    assert_true((abs_pearson_r[:5] > 0.1).all())
-    assert_true((abs_pearson_r[5:] < 0.2).all())
+    assert ((abs_pearson_r < 1).all())
+    assert ((abs_pearson_r[:5] > 0.1).all())
+    assert ((abs_pearson_r[5:] < 0.2).all())
 
     # with centering, compare with sparse
     abs_pearson_r = f_regression(X, y, center=True)

From 63d6fd759bcf3acadbc5cb1d6a144523a7d60e09 Mon Sep 17 00:00:00 2001
From: "Julien Jerphanion (@jjerphan)" <git@jjerphan.xyz>
Date: Sat, 9 May 2020 16:19:05 +0200
Subject: [PATCH 09/43] Change module for import to match current one

I made this change to have the test ran as I previously got:

```
sklearn/feature_selection/tests/test_feature_select.py:None (sklearn/feature_selection/tests/test_feature_select.py)
/home/jsquared/.virtualenvs/sk/lib64/python3.8/site-packages/py/_path/local.py:701: in pyimport
    __import__(modname)
../__init__.py:27: in <module>
    from ._mutual_info import mutual_info_regression, mutual_info_classif
../_mutual_info.py:9: in <module>
    from ..neighbors import NearestNeighbors
../../neighbors/__init__.py:17: in <module>
    from ._nca import NeighborhoodComponentsAnalysis
../../neighbors/_nca.py:22: in <module>
    from ..decomposition import PCA
../../decomposition/__init__.py:17: in <module>
    from .dict_learning import dict_learning
E   ModuleNotFoundError: No module named
'sklearn.decomposition.dict_learning'
```

It seems that there are reason for this special handling
to exist according to the comment above.

This might need to be reverted.
---
 sklearn/decomposition/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/decomposition/__init__.py b/sklearn/decomposition/__init__.py
index 42f661171eafe..cd4ec5b3c8668 100644
--- a/sklearn/decomposition/__init__.py
+++ b/sklearn/decomposition/__init__.py
@@ -14,7 +14,7 @@
 import warnings
 with warnings.catch_warnings():
     warnings.simplefilter("ignore", category=FutureWarning)
-    from .dict_learning import dict_learning
+    from ._dict_learning import dict_learning
 
 
 from ._nmf import NMF, non_negative_factorization  # noqa

From 1c9d5125be1d9eff482f12c02c363798fda933b8 Mon Sep 17 00:00:00 2001
From: "Julien Jerphanion (@jjerphan)" <git@jjerphan.xyz>
Date: Sat, 9 May 2020 16:35:02 +0200
Subject: [PATCH 10/43] Add reference in doc comment

---
 sklearn/feature_selection/_univariate_selection.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index c2290880f712a..762c0f9c948f5 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -356,7 +356,9 @@ def abs_r_regression(X, y, center=True):
     that require a statistic which is increases with significance of
     association.
 
-    see r_regression for details.
+    See also
+    --------
+    r_regression: Univariate linear regression tests returning Pearson R.
     """
     # compute the correlation
     corr = r_regression(X, y, center=center)

From 45514b2a79a53ec78d32047e09caa56477b22378 Mon Sep 17 00:00:00 2001
From: "Julien Jerphanion (@jjerphan)" <git@jjerphan.xyz>
Date: Sat, 9 May 2020 17:07:15 +0200
Subject: [PATCH 11/43] Revert "Change module for import to match current one"

This reverts commit 63d6fd759bcf3acadbc5cb1d6a144523a7d60e09.
---
 sklearn/decomposition/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/decomposition/__init__.py b/sklearn/decomposition/__init__.py
index cd4ec5b3c8668..42f661171eafe 100644
--- a/sklearn/decomposition/__init__.py
+++ b/sklearn/decomposition/__init__.py
@@ -14,7 +14,7 @@
 import warnings
 with warnings.catch_warnings():
     warnings.simplefilter("ignore", category=FutureWarning)
-    from ._dict_learning import dict_learning
+    from .dict_learning import dict_learning
 
 
 from ._nmf import NMF, non_negative_factorization  # noqa

From 6c120e3cec437e3962088008a374b56388393da7 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Tue, 9 Feb 2021 18:37:54 +0100
Subject: [PATCH 12/43] Merge branch 'main' into r_regression

---
 .../_univariate_selection.py                  | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 4259c24c14777..f4339efd1fc81 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -141,7 +141,7 @@ def f_classif(X, y):
     pval : array, shape = [n_features,]
         The set of p-values.
 
-    See Also
+    See also
     --------
     chi2 : Chi-squared stats of non-negative features for classification tasks.
     f_regression : F-value between label/feature for regression tasks.
@@ -204,7 +204,7 @@ def chi2(X, y):
     -----
     Complexity of this algorithm is O(n_classes * n_features).
 
-    See Also
+    See also
     --------
     f_classif : ANOVA F-value between label/feature for classification tasks.
     f_regression : F-value between label/feature for regression tasks.
@@ -259,7 +259,7 @@ def r_regression(X, y, *, center=True):
     corr : array, shape=(n_features,)
         Pearson R correlation coefficients of features.
 
-    See Also
+    See also
     --------
     f_regression: Univariate linear regression tests returning f-statistic
         and p-values
@@ -438,7 +438,7 @@ class SelectPercentile(_BaseFilter):
     score_func : callable, default=f_classif
         Function taking two arrays X and y, and returning a pair of arrays
         (scores, pvalues) or a single array with scores.
-        Default is f_classif (see below "See Also"). The default function only
+        Default is f_classif (see below "See also"). The default function only
         works with classification tasks.
 
         .. versionadded:: 0.18
@@ -470,7 +470,7 @@ class SelectPercentile(_BaseFilter):
     Ties between features with equal scores will be broken in an unspecified
     way.
 
-    See Also
+    See also
     --------
     f_classif : ANOVA F-value between label/feature for classification tasks.
     mutual_info_classif : Mutual information for a discrete target.
@@ -524,7 +524,7 @@ class SelectKBest(_BaseFilter):
     score_func : callable, default=f_classif
         Function taking two arrays X and y, and returning a pair of arrays
         (scores, pvalues) or a single array with scores.
-        Default is f_classif (see below "See Also"). The default function only
+        Default is f_classif (see below "See also"). The default function only
         works with classification tasks.
 
         .. versionadded:: 0.18
@@ -557,7 +557,7 @@ class SelectKBest(_BaseFilter):
     Ties between features with equal scores will be broken in an unspecified
     way.
 
-    See Also
+    See also
     --------
     f_classif: ANOVA F-value between label/feature for classification tasks.
     mutual_info_classif: Mutual information for a discrete target.
@@ -615,7 +615,7 @@ class SelectFpr(_BaseFilter):
     score_func : callable, default=f_classif
         Function taking two arrays X and y, and returning a pair of arrays
         (scores, pvalues).
-        Default is f_classif (see below "See Also"). The default function only
+        Default is f_classif (see below "See also"). The default function only
         works with classification tasks.
 
     alpha : float, default=5e-2
@@ -640,7 +640,7 @@ class SelectFpr(_BaseFilter):
     >>> X_new.shape
     (569, 16)
 
-    See Also
+    See also
     --------
     f_classif : ANOVA F-value between label/feature for classification tasks.
     chi2 : Chi-squared stats of non-negative features for classification tasks.
@@ -679,7 +679,7 @@ class SelectFdr(_BaseFilter):
     score_func : callable, default=f_classif
         Function taking two arrays X and y, and returning a pair of arrays
         (scores, pvalues).
-        Default is f_classif (see below "See Also"). The default function only
+        Default is f_classif (see below "See also"). The default function only
         works with classification tasks.
 
     alpha : float, default=5e-2
@@ -708,7 +708,7 @@ class SelectFdr(_BaseFilter):
     ----------
     https://en.wikipedia.org/wiki/False_discovery_rate
 
-    See Also
+    See also
     --------
     f_classif : ANOVA F-value between label/feature for classification tasks.
     mutual_info_classif : Mutual information for a discrete target.
@@ -750,7 +750,7 @@ class SelectFwe(_BaseFilter):
     score_func : callable, default=f_classif
         Function taking two arrays X and y, and returning a pair of arrays
         (scores, pvalues).
-        Default is f_classif (see below "See Also"). The default function only
+        Default is f_classif (see below "See also"). The default function only
         works with classification tasks.
 
     alpha : float, default=5e-2
@@ -775,7 +775,7 @@ class SelectFwe(_BaseFilter):
     pvalues_ : array-like of shape (n_features,)
         p-values of feature scores.
 
-    See Also
+    See also
     --------
     f_classif : ANOVA F-value between label/feature for classification tasks.
     chi2 : Chi-squared stats of non-negative features for classification tasks.
@@ -843,7 +843,7 @@ class GenericUnivariateSelect(_BaseFilter):
     >>> X_new.shape
     (569, 20)
 
-    See Also
+    See also
     --------
     f_classif : ANOVA F-value between label/feature for classification tasks.
     mutual_info_classif : Mutual information for a discrete target.

From 98383aa22a413c3e1a8d168149aa38af54223123 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 15 Feb 2021 16:39:53 +0100
Subject: [PATCH 13/43] Add documentation reference for abs_r_regression and
 r_regression

---
 doc/modules/classes.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 65d555f978df0..33126a7ed259a 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -557,9 +557,11 @@ From text
    :toctree: generated/
    :template: function.rst
 
+   feature_selection.abs_r_regression
    feature_selection.chi2
    feature_selection.f_classif
    feature_selection.f_regression
+   feature_selection.r_regression
    feature_selection.mutual_info_classif
    feature_selection.mutual_info_regression
 

From 17838def33d17084da41b6484946ff4d8f7fbe0a Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 15 Feb 2021 17:01:08 +0100
Subject: [PATCH 14/43] Complete test to include r_regression

---
 .../tests/test_feature_select.py              | 25 ++++++++-----------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index 2ec73cff20b7a..120e8aec0bc1e 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -18,7 +18,7 @@
 
 from sklearn.datasets import make_classification, make_regression
 from sklearn.feature_selection import (
-    chi2, f_classif, f_oneway, f_regression, abs_r_regression,
+    chi2, f_classif, f_oneway, f_regression, abs_r_regression, r_regression,
     mutual_info_classif, mutual_info_regression, SelectPercentile,
     SelectKBest, SelectFpr, SelectFdr, SelectFwe,
     GenericUnivariateSelect)
@@ -72,26 +72,23 @@ def test_f_classif():
     assert_array_almost_equal(pv_sparse, pv)
 
 
-def test_abs_r_regression():
-    # Test whether the F test yields meaningful results
-    # on a simple simulated regression problem
+@pytest.mark.parametrize("coeff", [abs_r_regression, r_regression])
+def test_r_regression(coeff):
     X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
                            shuffle=False, random_state=0)
 
-    abs_pearson_r = abs_r_regression(X, y)
-    assert ((abs_pearson_r < 1).all())
-    assert ((abs_pearson_r[:5] > 0.1).all())
-    assert ((abs_pearson_r[5:] < 0.2).all())
+    pearson_r = coeff(X, y)
+    assert ((pearson_r < 1).all())
 
     # with centering, compare with sparse
-    abs_pearson_r = f_regression(X, y, center=True)
-    abs_pearson_r_sparse = f_regression(sparse.csr_matrix(X), y, center=True)
-    assert_array_almost_equal(abs_pearson_r_sparse, abs_pearson_r)
+    pearson_r = coeff(X, y, center=True)
+    pearson_r_sparse = coeff(sparse.csr_matrix(X), y, center=True)
+    assert_array_almost_equal(pearson_r_sparse, pearson_r)
 
     # again without centering, compare with sparse
-    abs_pearson_r = f_regression(X, y, center=False)
-    abs_pearson_r_sparse = f_regression(sparse.csr_matrix(X), y, center=False)
-    assert_array_almost_equal(abs_pearson_r_sparse, abs_pearson_r)
+    pearson_r = coeff(X, y, center=False)
+    pearson_r_sparse = coeff(sparse.csr_matrix(X), y, center=False)
+    assert_array_almost_equal(pearson_r_sparse, pearson_r)
 
 
 def test_f_regression():

From e1a43e8468a45125b202d72b5e8033aa2da302aa Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Tue, 16 Feb 2021 08:38:21 +0100
Subject: [PATCH 15/43] Remove Sphinx warning on indentation

Co-authored-by: Chiara Marmo <cmarmo@users.noreply.github.com>
---
 sklearn/feature_selection/_univariate_selection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index f4339efd1fc81..66b0c368d7437 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -239,7 +239,7 @@ def r_regression(X, y, *, center=True):
 
     The cross correlation between each regressor and the target is computed,
     that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *
-       std(y)).
+    std(y)).
 
     For more on usage see the :ref:`User Guide <univariate_feature_selection>`.
 

From e5aa83144af3fd5014c545bd50a5c2d6f7ac634f Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Tue, 16 Feb 2021 15:10:36 +0100
Subject: [PATCH 16/43] Use 'See Also' over 'See Also'

See Guidelines for writing documentation:

https://scikit-learn.org/stable/developers/contributing.html#guidelines-for-writing-documentation

Co-authored-by: Olivier Grisel <olivier.grisel@gmail.com>
---
 .../_univariate_selection.py                  | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 66b0c368d7437..1e5ac53868303 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -141,7 +141,7 @@ def f_classif(X, y):
     pval : array, shape = [n_features,]
         The set of p-values.
 
-    See also
+    See Also
     --------
     chi2 : Chi-squared stats of non-negative features for classification tasks.
     f_regression : F-value between label/feature for regression tasks.
@@ -204,7 +204,7 @@ def chi2(X, y):
     -----
     Complexity of this algorithm is O(n_classes * n_features).
 
-    See also
+    See Also
     --------
     f_classif : ANOVA F-value between label/feature for classification tasks.
     f_regression : F-value between label/feature for regression tasks.
@@ -259,7 +259,7 @@ def r_regression(X, y, *, center=True):
     corr : array, shape=(n_features,)
         Pearson R correlation coefficients of features.
 
-    See also
+    See Also
     --------
     f_regression: Univariate linear regression tests returning f-statistic
         and p-values
@@ -334,7 +334,7 @@ def f_regression(X, y, *, center=True):
     pval : array, shape=(n_features,)
         p-values of F-scores.
 
-    See also
+    See Also
     --------
     r_regression: Univariate linear regression tests returning Pearson R.
     f_classif: ANOVA F-value between label/feature for classification tasks.
@@ -357,7 +357,7 @@ def abs_r_regression(X, y, center=True):
     that require a statistic which is increases with significance of
     association.
 
-    See also
+    See Also
     --------
     r_regression: Univariate linear regression tests returning Pearson R.
     """
@@ -438,7 +438,7 @@ class SelectPercentile(_BaseFilter):
     score_func : callable, default=f_classif
         Function taking two arrays X and y, and returning a pair of arrays
         (scores, pvalues) or a single array with scores.
-        Default is f_classif (see below "See also"). The default function only
+        Default is f_classif (see below "See Also"). The default function only
         works with classification tasks.
 
         .. versionadded:: 0.18
@@ -470,7 +470,7 @@ class SelectPercentile(_BaseFilter):
     Ties between features with equal scores will be broken in an unspecified
     way.
 
-    See also
+    See Also
     --------
     f_classif : ANOVA F-value between label/feature for classification tasks.
     mutual_info_classif : Mutual information for a discrete target.
@@ -524,7 +524,7 @@ class SelectKBest(_BaseFilter):
     score_func : callable, default=f_classif
         Function taking two arrays X and y, and returning a pair of arrays
         (scores, pvalues) or a single array with scores.
-        Default is f_classif (see below "See also"). The default function only
+        Default is f_classif (see below "See Also"). The default function only
         works with classification tasks.
 
         .. versionadded:: 0.18
@@ -557,7 +557,7 @@ class SelectKBest(_BaseFilter):
     Ties between features with equal scores will be broken in an unspecified
     way.
 
-    See also
+    See Also
     --------
     f_classif: ANOVA F-value between label/feature for classification tasks.
     mutual_info_classif: Mutual information for a discrete target.
@@ -615,7 +615,7 @@ class SelectFpr(_BaseFilter):
     score_func : callable, default=f_classif
         Function taking two arrays X and y, and returning a pair of arrays
         (scores, pvalues).
-        Default is f_classif (see below "See also"). The default function only
+        Default is f_classif (see below "See Also"). The default function only
         works with classification tasks.
 
     alpha : float, default=5e-2
@@ -640,7 +640,7 @@ class SelectFpr(_BaseFilter):
     >>> X_new.shape
     (569, 16)
 
-    See also
+    See Also
     --------
     f_classif : ANOVA F-value between label/feature for classification tasks.
     chi2 : Chi-squared stats of non-negative features for classification tasks.
@@ -679,7 +679,7 @@ class SelectFdr(_BaseFilter):
     score_func : callable, default=f_classif
         Function taking two arrays X and y, and returning a pair of arrays
         (scores, pvalues).
-        Default is f_classif (see below "See also"). The default function only
+        Default is f_classif (see below "See Also"). The default function only
         works with classification tasks.
 
     alpha : float, default=5e-2
@@ -708,7 +708,7 @@ class SelectFdr(_BaseFilter):
     ----------
     https://en.wikipedia.org/wiki/False_discovery_rate
 
-    See also
+    See Also
     --------
     f_classif : ANOVA F-value between label/feature for classification tasks.
     mutual_info_classif : Mutual information for a discrete target.
@@ -750,7 +750,7 @@ class SelectFwe(_BaseFilter):
     score_func : callable, default=f_classif
         Function taking two arrays X and y, and returning a pair of arrays
         (scores, pvalues).
-        Default is f_classif (see below "See also"). The default function only
+        Default is f_classif (see below "See Also"). The default function only
         works with classification tasks.
 
     alpha : float, default=5e-2
@@ -775,7 +775,7 @@ class SelectFwe(_BaseFilter):
     pvalues_ : array-like of shape (n_features,)
         p-values of feature scores.
 
-    See also
+    See Also
     --------
     f_classif : ANOVA F-value between label/feature for classification tasks.
     chi2 : Chi-squared stats of non-negative features for classification tasks.
@@ -843,7 +843,7 @@ class GenericUnivariateSelect(_BaseFilter):
     >>> X_new.shape
     (569, 20)
 
-    See also
+    See Also
     --------
     f_classif : ANOVA F-value between label/feature for classification tasks.
     mutual_info_classif : Mutual information for a discrete target.

From bcaae35b4fa71bbd048c81db9eff2633cc7ee62e Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Tue, 16 Feb 2021 15:40:09 +0100
Subject: [PATCH 17/43] Clarify docstrings

---
 .../_univariate_selection.py                  | 33 +++++++++++++------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 1e5ac53868303..8bd379b15988f 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -231,7 +231,7 @@ def chi2(X, y):
 
 @_deprecate_positional_args
 def r_regression(X, y, *, center=True):
-    """Univariate linear regression tests returning Pearson R.
+    """Compute Pearson R correlation coefficients of features.
 
     Linear model for testing the individual effect of each of many regressors.
     This is a scoring function to be used in a feature selection procedure, not
@@ -261,17 +261,13 @@ def r_regression(X, y, *, center=True):
 
     See Also
     --------
+    abs_r_regression: Absolute value of Pearson R between label and features
+        for regression tasks.
     f_regression: Univariate linear regression tests returning f-statistic
         and p-values
     mutual_info_regression: Mutual information for a continuous target.
     f_classif: ANOVA F-value between label/feature for classification tasks.
     chi2: Chi-squared stats of non-negative features for classification tasks.
-    SelectKBest: Select features based on the k highest scores.
-    SelectFpr: Select features based on a false positive rate test.
-    SelectFdr: Select features based on an estimated false discovery rate.
-    SelectFwe: Select features based on family-wise error rate.
-    SelectPercentile: Select features based on percentile of the highest
-        scores.
     """
     X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
                      dtype=np.float64)
@@ -313,6 +309,14 @@ def f_regression(X, y, *, center=True):
        std(y)) using r_regression function.
     2. It is converted to an F score and then to a p-value.
 
+    If p-values are not needed, r_regression can be used to rank features as
+    a slightly cheaper alternative to f_regression. Note however that
+    contrary to f_regression, r_regression values lie in [-1, 1] and can thus
+    be negative.
+
+    Alternatively, `abs_r_regression` can be used to rank features by
+    correlation magnitude instead.
+
     Read more in the :ref:`User Guide <univariate_feature_selection>`.
 
     Parameters
@@ -336,9 +340,17 @@ def f_regression(X, y, *, center=True):
 
     See Also
     --------
-    r_regression: Univariate linear regression tests returning Pearson R.
+    abs_r_regression: Absolute value of Pearson R between label and features
+        for regression tasks.
+    r_regression: Pearson R between label/feature for regression tasks.
     f_classif: ANOVA F-value between label/feature for classification tasks.
     chi2: Chi-squared stats of non-negative features for classification tasks.
+    SelectKBest: Select features based on the k highest scores.
+    SelectFpr: Select features based on a false positive rate test.
+    SelectFdr: Select features based on an estimated false discovery rate.
+    SelectFwe: Select features based on family-wise error rate.
+    SelectPercentile: Select features based on percentile of the highest
+        scores.
     """
 
     # compute the correlation
@@ -360,6 +372,7 @@ def abs_r_regression(X, y, center=True):
     See Also
     --------
     r_regression: Univariate linear regression tests returning Pearson R.
+    SelectKBest: Select features based on the k highest scores.
     """
     # compute the correlation
     corr = r_regression(X, y, center=center)
@@ -562,8 +575,8 @@ class SelectKBest(_BaseFilter):
     f_classif: ANOVA F-value between label/feature for classification tasks.
     mutual_info_classif: Mutual information for a discrete target.
     chi2: Chi-squared stats of non-negative features for classification tasks.
-    abs_r_regression: absolute value of Pearson R between label/feature for
-        regression tasks.
+    abs_r_regression: Absolute value of Pearson R between label and features
+        for regression tasks.
     f_regression: F-value between label/feature for regression tasks.
     mutual_info_regression: Mutual information for a continuous target.
     SelectPercentile: Select features based on percentile of the highest

From 8a803ea11a9a2cad53ce0d5e8250b97194a81f73 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Tue, 16 Feb 2021 15:48:57 +0100
Subject: [PATCH 18/43] Add consistency test

---
 .../tests/test_feature_select.py                  | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index 120e8aec0bc1e..b0d71a3ba3f36 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -4,6 +4,7 @@
 import itertools
 import warnings
 import numpy as np
+from numpy.testing import assert_allclose
 from scipy import stats, sparse
 
 import pytest
@@ -117,6 +118,20 @@ def test_f_regression():
     assert_array_almost_equal(pv_sparse, pv)
 
 
+def test_f_regression_r_regression_consistency():
+    # Test the equivalence of f_regression and abs_r_regression for variable
+    # selection using the returned values ordering
+    X, y = make_regression(n_samples=200, n_features=1000,
+                           shuffle=False, random_state=0)
+
+    Fs, _ = f_regression(X, y)
+
+    assert_array_equal(Fs.argsort(), abs_r_regression(X, y).argsort())
+
+    # Test consistency of definition
+    assert_allclose(abs_r_regression(X, y), np.abs(r_regression(X, y)))
+
+
 def test_f_regression_input_dtype():
     # Test whether f_regression returns the same value
     # for any numeric data_type

From 47b2ea904818301784bb96f6408795b943f6f860 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Tue, 16 Feb 2021 15:52:09 +0100
Subject: [PATCH 19/43] Test for Pearson R correct support

Also use a better wording.
---
 .../tests/test_feature_select.py               | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index b0d71a3ba3f36..cbadd80809caf 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -78,18 +78,20 @@ def test_r_regression(coeff):
     X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
                            shuffle=False, random_state=0)
 
-    pearson_r = coeff(X, y)
-    assert ((pearson_r < 1).all())
+    correlation_coeffs = coeff(X, y)
+    if coeff == r_regression:
+        assert ((correlation_coeffs > -1).all())
+    assert ((correlation_coeffs < 1).all())
 
     # with centering, compare with sparse
-    pearson_r = coeff(X, y, center=True)
-    pearson_r_sparse = coeff(sparse.csr_matrix(X), y, center=True)
-    assert_array_almost_equal(pearson_r_sparse, pearson_r)
+    correlation_coeffs = coeff(X, y, center=True)
+    correlation_coeffs_sparse = coeff(sparse.csr_matrix(X), y, center=True)
+    assert_array_almost_equal(correlation_coeffs_sparse, correlation_coeffs)
 
     # again without centering, compare with sparse
-    pearson_r = coeff(X, y, center=False)
-    pearson_r_sparse = coeff(sparse.csr_matrix(X), y, center=False)
-    assert_array_almost_equal(pearson_r_sparse, pearson_r)
+    correlation_coeffs = coeff(X, y, center=False)
+    correlation_coeffs_sparse = coeff(sparse.csr_matrix(X), y, center=False)
+    assert_array_almost_equal(correlation_coeffs_sparse, correlation_coeffs)
 
 
 def test_f_regression():

From da964772dff07e90e8501b1825400b4e511616c3 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Tue, 16 Feb 2021 16:28:28 +0100
Subject: [PATCH 20/43] fixup! Clarify docstrings

---
 sklearn/feature_selection/_univariate_selection.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 8bd379b15988f..32bdd085a8935 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -231,7 +231,8 @@ def chi2(X, y):
 
 @_deprecate_positional_args
 def r_regression(X, y, *, center=True):
-    """Compute Pearson R correlation coefficients of features.
+    """Compute Pearson R correlation coefficients between the features and
+    the target
 
     Linear model for testing the individual effect of each of many regressors.
     This is a scoring function to be used in a feature selection procedure, not
@@ -366,7 +367,7 @@ def abs_r_regression(X, y, center=True):
     """Absolute value of Pearson R from univariate linear regressions.
 
     This convenience wrapper is to be used with SelectKBest and other models
-    that require a statistic which is increases with significance of
+    that require a statistic which is increased with significance of
     association.
 
     See Also

From 4a548b5227c3840281f733844c6b056cb4af251a Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 12 Mar 2021 21:56:39 +0100
Subject: [PATCH 21/43] Add docstring directives for additions in version 1.0

Co-authored-by: Chiara Marmo <chiara.marmo@u-psud.fr>
---
 doc/whats_new/v1.0.rst                             | 11 +++++++++++
 sklearn/feature_selection/_univariate_selection.py |  4 ++++
 2 files changed, 15 insertions(+)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index a566d03ae1bbc..3732b5a6d8412 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -103,6 +103,17 @@ Changelog
   input strings would result in negative indices in the transformed data.
   :pr:`19035` by :user:`Liu Yu <ly648499246>`.
 
+:mod:`sklearn.feature_selection`
+................................
+
+- |Feature| :func:`feature_selection.abs_r_regression` is new criterion
+  which can be used with :class:`feature_selection.SelectKBest` to select
+  variables. It is the absolution values of
+  :func:`feature_selection.r_regression` which computes Pearson R correlation
+  coefficients between the features and the target.
+  :pr:`17169` by `Dmytro Lituiev <DSLituiev>`
+  and `Julien Jerphanion <jjerphan>`.
+
 :mod:`sklearn.inspection`
 .........................
 
diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 32bdd085a8935..a78fe309dce89 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -234,6 +234,8 @@ def r_regression(X, y, *, center=True):
     """Compute Pearson R correlation coefficients between the features and
     the target
 
+    .. versionadded:: 1.0
+
     Linear model for testing the individual effect of each of many regressors.
     This is a scoring function to be used in a feature selection procedure, not
     a free standing feature selection procedure.
@@ -366,6 +368,8 @@ def f_regression(X, y, *, center=True):
 def abs_r_regression(X, y, center=True):
     """Absolute value of Pearson R from univariate linear regressions.
 
+    .. versionadded:: 1.0
+
     This convenience wrapper is to be used with SelectKBest and other models
     that require a statistic which is increased with significance of
     association.

From 711544c24e568403f840042d41fd1a3ff47f2b00 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 08:05:30 +0200
Subject: [PATCH 22/43] Fix typo in the whats_new entry

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 doc/whats_new/v1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 3732b5a6d8412..d146df8dff303 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -106,7 +106,7 @@ Changelog
 :mod:`sklearn.feature_selection`
 ................................
 
-- |Feature| :func:`feature_selection.abs_r_regression` is new criterion
+- |Feature| :func:`feature_selection.abs_r_regression` is a new criterion
   which can be used with :class:`feature_selection.SelectKBest` to select
   variables. It is the absolution values of
   :func:`feature_selection.r_regression` which computes Pearson R correlation

From f7fa09d407877841ce9a35b9cf77c1e4f37738d7 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 08:08:19 +0200
Subject: [PATCH 23/43] End sentence with a full stop.

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/feature_selection/_univariate_selection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index a78fe309dce89..6e855f755dee7 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -232,7 +232,7 @@ def chi2(X, y):
 @_deprecate_positional_args
 def r_regression(X, y, *, center=True):
     """Compute Pearson R correlation coefficients between the features and
-    the target
+    the target.
 
     .. versionadded:: 1.0
 

From cfe2299903c186dde0d305f5b043448ac5dba30b Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 08:08:43 +0200
Subject: [PATCH 24/43] Correct typo in docstring.

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/feature_selection/_univariate_selection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 6e855f755dee7..825f2b9b20732 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -231,7 +231,7 @@ def chi2(X, y):
 
 @_deprecate_positional_args
 def r_regression(X, y, *, center=True):
-    """Compute Pearson R correlation coefficients between the features and
+    """Compute Pearson's R correlation coefficients between the features and
     the target.
 
     .. versionadded:: 1.0

From 92c389ddb44741f764ab6bf91050534649694d38 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 08:09:27 +0200
Subject: [PATCH 25/43] Correct typo in the whats_new entry.

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 doc/whats_new/v1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index d146df8dff303..b0e8ca63e78f8 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -109,7 +109,7 @@ Changelog
 - |Feature| :func:`feature_selection.abs_r_regression` is a new criterion
   which can be used with :class:`feature_selection.SelectKBest` to select
   variables. It is the absolution values of
-  :func:`feature_selection.r_regression` which computes Pearson R correlation
+  :func:`feature_selection.r_regression` which computes Pearson's R correlation
   coefficients between the features and the target.
   :pr:`17169` by `Dmytro Lituiev <DSLituiev>`
   and `Julien Jerphanion <jjerphan>`.

From 29bf1d06a34ccd6bf797b2dab13f49b7b99e1c48 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 08:11:10 +0200
Subject: [PATCH 26/43] Fix syntax in r_regression's docstring.

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/feature_selection/_univariate_selection.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 825f2b9b20732..0c55ea1adcb56 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -240,9 +240,8 @@ def r_regression(X, y, *, center=True):
     This is a scoring function to be used in a feature selection procedure, not
     a free standing feature selection procedure.
 
-    The cross correlation between each regressor and the target is computed,
-    that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *
-    std(y)).
+    The cross correlation between each regressor and the target is computed
+    as ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) * std(y)).
 
     For more on usage see the :ref:`User Guide <univariate_feature_selection>`.
 

From bbf4179631d34147e2263d2f949db641a945412a Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 08:21:59 +0200
Subject: [PATCH 27/43] Sort imports alphabetically

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/feature_selection/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/feature_selection/__init__.py b/sklearn/feature_selection/__init__.py
index a72719e7a6878..eae46cf21006d 100644
--- a/sklearn/feature_selection/__init__.py
+++ b/sklearn/feature_selection/__init__.py
@@ -42,12 +42,12 @@
            'SelectFromModel',
            'SelectPercentile',
            'VarianceThreshold',
+           'abs_r_regression',
            'chi2',
            'f_classif',
            'f_oneway',
            'f_regression',
            'r_regression',
-           'abs_r_regression',
            'mutual_info_classif',
            'mutual_info_regression',
            'SelectorMixin']

From af31e7244d137e63e316a43bce9a4adbfec69a92 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 08:22:58 +0200
Subject: [PATCH 28/43] Remove useless decorator

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/feature_selection/_univariate_selection.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 0c55ea1adcb56..6a976555e8d5f 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -229,7 +229,6 @@ def chi2(X, y):
     return _chisquare(observed, expected)
 
 
-@_deprecate_positional_args
 def r_regression(X, y, *, center=True):
     """Compute Pearson's R correlation coefficients between the features and
     the target.

From 69e638ce2757d05f33e4251e3878229818675d77 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 08:25:23 +0200
Subject: [PATCH 29/43] Use verbose name for the correlation coefficient

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/feature_selection/_univariate_selection.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 6a976555e8d5f..968169b024a8d 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -257,7 +257,7 @@ def r_regression(X, y, *, center=True):
 
     Returns
     -------
-    corr : array, shape=(n_features,)
+    correlation_coefficient : ndarray of shape (n_features,)
         Pearson R correlation coefficients of features.
 
     See Also
@@ -290,10 +290,10 @@ def r_regression(X, y, *, center=True):
         X_norms = row_norms(X.T)
 
     # compute the correlation
-    corr = safe_sparse_dot(y, X)
-    corr /= X_norms
-    corr /= np.linalg.norm(y)
-    return corr
+    correlation_coefficient = safe_sparse_dot(y, X)
+    correlation_coefficient /= X_norms
+    correlation_coefficient /= np.linalg.norm(y)
+    return correlation_coefficient
 
 
 @_deprecate_positional_args

From f5aa51354eb8699fe42df5c001baae4ecd10319c Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 08:30:56 +0200
Subject: [PATCH 30/43] Improve r_regression and r_regression docstring

Use accurate descriptions for the parameters.

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 .../_univariate_selection.py                  | 24 ++++++++++---------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 968169b024a8d..f0d0b9b145fea 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -246,14 +246,15 @@ def r_regression(X, y, *, center=True):
 
     Parameters
     ----------
-    X : {array-like, sparse matrix}  shape = (n_samples, n_features)
-        The set of regressors that will be tested sequentially.
+    X : {array-like, sparse matrix} of shape (n_samples, n_features)
+        The data matrix.
 
-    y : array of shape(n_samples).
-        The data matrix
+    y : array-like of shape (n_samples,)
+        The target vector.
 
     center : bool, default=True
-        If true, X and y will be centered.
+        Whether or not to center the data matrix `X` and the target vector `y`.
+        By default, `X` and `y` will be centered.
 
     Returns
     -------
@@ -322,14 +323,15 @@ def f_regression(X, y, *, center=True):
 
     Parameters
     ----------
-    X : {array-like, sparse matrix}  shape = (n_samples, n_features)
-        The set of regressors that will be tested sequentially.
+    X : {array-like, sparse matrix} of shape (n_samples, n_features)
+        The data matrix.
 
-    y : array of shape(n_samples).
-        The data matrix
+    y : array-like of shape (n_samples,)
+        The target vector.
 
-    center : True, bool,
-        If true, X and y will be centered.
+    center : bool, default=True
+        Whether or not to center the data matrix `X` and the target vector `y`.
+        By default, `X` and `y` will be centered.
 
     Returns
     -------

From dcd94633e75196ed26a17fa859bb5ff7a8783d30 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 08:36:02 +0200
Subject: [PATCH 31/43] Improve code comments

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/feature_selection/_univariate_selection.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index f0d0b9b145fea..70c425be3080e 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -275,8 +275,8 @@ def r_regression(X, y, *, center=True):
                      dtype=np.float64)
     n_samples = X.shape[0]
 
-    # compute centered values
-    # note that E[(x - mean(x))*(y - mean(y))] = E[x*(y - mean(y))], so we
+    # Compute centered values
+    # Note that E[(x - mean(x))*(y - mean(y))] = E[x*(y - mean(y))], so we
     # need not center X
     if center:
         y = y - np.mean(y)
@@ -284,13 +284,12 @@ def r_regression(X, y, *, center=True):
             X_means = X.mean(axis=0).getA1()
         else:
             X_means = X.mean(axis=0)
-        # compute the scaled standard deviations via moments
+        # Compute the scaled standard deviations via moments
         X_norms = np.sqrt(row_norms(X.T, squared=True) -
                           n_samples * X_means ** 2)
     else:
         X_norms = row_norms(X.T)
 
-    # compute the correlation
     correlation_coefficient = safe_sparse_dot(y, X)
     correlation_coefficient /= X_norms
     correlation_coefficient /= np.linalg.norm(y)
@@ -355,11 +354,9 @@ def f_regression(X, y, *, center=True):
     SelectPercentile: Select features based on percentile of the highest
         scores.
     """
-
-    # compute the correlation
     corr = r_regression(X, y, center=center)
     degrees_of_freedom = y.size - (2 if center else 1)
-    # convert to p-value
+    # Compute the test's statistics and its p-values
     F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
     pv = stats.f.sf(F, 1, degrees_of_freedom)
     return F, pv
@@ -379,7 +376,6 @@ def abs_r_regression(X, y, center=True):
     r_regression: Univariate linear regression tests returning Pearson R.
     SelectKBest: Select features based on the k highest scores.
     """
-    # compute the correlation
     corr = r_regression(X, y, center=center)
     return abs(corr)
 

From a16df67950731147c1d0ded8e45eedfc740697df Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 08:39:27 +0200
Subject: [PATCH 32/43] Add Sphinx domains in f_regression's docstring

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/feature_selection/_univariate_selection.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 70c425be3080e..dff2f86c07a9c 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -310,12 +310,12 @@ def f_regression(X, y, *, center=True):
        std(y)) using r_regression function.
     2. It is converted to an F score and then to a p-value.
 
-    If p-values are not needed, r_regression can be used to rank features as
-    a slightly cheaper alternative to f_regression. Note however that
-    contrary to f_regression, r_regression values lie in [-1, 1] and can thus
-    be negative.
+    If p-values are not needed, :func:`r_regression` can be used to rank
+    features as a slightly cheaper alternative to :func:`f_regression`.
+    Note however that contrary to :func:`f_regression`, :func:`r_regression`
+    values lie in [-1, 1] and can thus be negative.
 
-    Alternatively, `abs_r_regression` can be used to rank features by
+    Alternatively, :func:`abs_r_regression` can be used to rank features by
     correlation magnitude instead.
 
     Read more in the :ref:`User Guide <univariate_feature_selection>`.

From 62b76ad16ca31147c8a6736744a5b688c56b2567 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 08:47:42 +0200
Subject: [PATCH 33/43] Make f statistics' and their p-values' computations
 clearer

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 .../_univariate_selection.py                  | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index dff2f86c07a9c..6ecd488cb4b13 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -334,11 +334,11 @@ def f_regression(X, y, *, center=True):
 
     Returns
     -------
-    F : array, shape=(n_features,)
-        F values of features.
+    f_statistic : ndarray of shape (n_features,)
+        F-statistic for each feature.
 
-    pval : array, shape=(n_features,)
-        p-values of F-scores.
+    p_values : ndarray of shape (n_features,)
+        P-values associated with the F-statistic.
 
     See Also
     --------
@@ -354,12 +354,13 @@ def f_regression(X, y, *, center=True):
     SelectPercentile: Select features based on percentile of the highest
         scores.
     """
-    corr = r_regression(X, y, center=center)
-    degrees_of_freedom = y.size - (2 if center else 1)
-    # Compute the test's statistics and its p-values
-    F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
-    pv = stats.f.sf(F, 1, degrees_of_freedom)
-    return F, pv
+    correlation_coefficient = r_regression(X, y, center=center)
+    deg_of_freedom = y.size - (2 if center else 1)
+
+    corr_coef_squared = correlation_coefficient ** 2
+    f_statistic = corr_coef_squared / (1 - corr_coef_squared) * deg_of_freedom
+    p_values = stats.f.sf(f_statistic, 1, deg_of_freedom)
+    return f_statistic, p_values
 
 
 def abs_r_regression(X, y, center=True):

From 107534e13b57754bce52dce926ba76a3f27796bd Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 08:51:37 +0200
Subject: [PATCH 34/43] Improve wording abs_r_regression

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 .../_univariate_selection.py                  | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 6ecd488cb4b13..d554ff5300ab2 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -263,8 +263,8 @@ def r_regression(X, y, *, center=True):
 
     See Also
     --------
-    abs_r_regression: Absolute value of Pearson R between label and features
-        for regression tasks.
+    abs_r_regression: Absolute value of Pearson's R correlation coefficients
+        between label and features for regression tasks.
     f_regression: Univariate linear regression tests returning f-statistic
         and p-values
     mutual_info_regression: Mutual information for a continuous target.
@@ -342,8 +342,8 @@ def f_regression(X, y, *, center=True):
 
     See Also
     --------
-    abs_r_regression: Absolute value of Pearson R between label and features
-        for regression tasks.
+    abs_r_regression: Absolute value of Pearson's R correlation coefficients
+        between label and features for regression tasks.
     r_regression: Pearson R between label/feature for regression tasks.
     f_classif: ANOVA F-value between label/feature for classification tasks.
     chi2: Chi-squared stats of non-negative features for classification tasks.
@@ -364,21 +364,20 @@ def f_regression(X, y, *, center=True):
 
 
 def abs_r_regression(X, y, center=True):
-    """Absolute value of Pearson R from univariate linear regressions.
+    """Absolute value of Pearson's R from univariate linear regressions.
 
     .. versionadded:: 1.0
 
-    This convenience wrapper is to be used with SelectKBest and other models
-    that require a statistic which is increased with significance of
-    association.
+    This convenience wrapper is to be used with
+    :class:`~sklearn.feature_selection.SelectKBest`.
 
     See Also
     --------
-    r_regression: Univariate linear regression tests returning Pearson R.
+    r_regression: Univariate linear regression tests returning Pearson's R
+        correlation coefficient.
     SelectKBest: Select features based on the k highest scores.
     """
-    corr = r_regression(X, y, center=center)
-    return abs(corr)
+    return np.abs(r_regression(X, y, center=center))
 
 ######################################################################
 # Base classes
@@ -577,7 +576,7 @@ class SelectKBest(_BaseFilter):
     f_classif: ANOVA F-value between label/feature for classification tasks.
     mutual_info_classif: Mutual information for a discrete target.
     chi2: Chi-squared stats of non-negative features for classification tasks.
-    abs_r_regression: Absolute value of Pearson R between label and features
+    abs_r_regression: Absolute value of Pearson's R between label and features
         for regression tasks.
     f_regression: F-value between label/feature for regression tasks.
     mutual_info_regression: Mutual information for a continuous target.

From ce64e78669238eec2562e2c6d3e5fbfcd3d7ca30 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 09:02:16 +0200
Subject: [PATCH 35/43] Use black formatting style for imports

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 .../tests/test_feature_select.py              | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index cbadd80809caf..23a5920a92b3d 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -19,10 +19,21 @@
 
 from sklearn.datasets import make_classification, make_regression
 from sklearn.feature_selection import (
-    chi2, f_classif, f_oneway, f_regression, abs_r_regression, r_regression,
-    mutual_info_classif, mutual_info_regression, SelectPercentile,
-    SelectKBest, SelectFpr, SelectFdr, SelectFwe,
-    GenericUnivariateSelect)
+    abs_r_regression,
+    chi2,
+    f_classif,
+    f_oneway,
+    f_regression,
+    GenericUnivariateSelect,
+    mutual_info_classif,
+    mutual_info_regression,
+    r_regression,
+    SelectPercentile,
+    SelectKBest,
+    SelectFpr,
+    SelectFdr,
+    SelectFwe,
+)
 
 
 ##############################################################################

From 537de9668a736fb835061a952bf8ea61b9b44eaf Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 09:03:21 +0200
Subject: [PATCH 36/43] Remove useless comments

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/feature_selection/tests/test_feature_select.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index 23a5920a92b3d..42627376dc2f1 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -141,7 +141,6 @@ def test_f_regression_r_regression_consistency():
 
     assert_array_equal(Fs.argsort(), abs_r_regression(X, y).argsort())
 
-    # Test consistency of definition
     assert_allclose(abs_r_regression(X, y), np.abs(r_regression(X, y)))
 
 

From df1ab5b589ef9393e36b3b2b1a11797a706f3611 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 09:05:02 +0200
Subject: [PATCH 37/43] Prefer assert_allclose over assert_array_almost_equal

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 .../tests/test_feature_select.py              | 35 +++++++++----------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index 42627376dc2f1..ee34009352e8d 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -11,7 +11,6 @@
 
 from sklearn.utils._testing import assert_almost_equal
 from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
 from sklearn.utils._testing import assert_warns
 from sklearn.utils._testing import ignore_warnings
 from sklearn.utils._testing import assert_warns_message
@@ -60,8 +59,8 @@ def test_f_oneway_ints():
 
     # test that is gives the same result as with float
     f, p = f_oneway(X.astype(float), y)
-    assert_array_almost_equal(f, fint, decimal=4)
-    assert_array_almost_equal(p, pint, decimal=4)
+    assert_allclose(f, fint)
+    assert_allclose(p, pint)
 
 
 def test_f_classif():
@@ -80,8 +79,8 @@ def test_f_classif():
     assert (pv < 1).all()
     assert (pv[:5] < 0.05).all()
     assert (pv[5:] > 1.e-4).all()
-    assert_array_almost_equal(F_sparse, F)
-    assert_array_almost_equal(pv_sparse, pv)
+    assert_allclose(F_sparse, F)
+    assert_allclose(pv_sparse, pv)
 
 
 @pytest.mark.parametrize("coeff", [abs_r_regression, r_regression])
@@ -97,12 +96,12 @@ def test_r_regression(coeff):
     # with centering, compare with sparse
     correlation_coeffs = coeff(X, y, center=True)
     correlation_coeffs_sparse = coeff(sparse.csr_matrix(X), y, center=True)
-    assert_array_almost_equal(correlation_coeffs_sparse, correlation_coeffs)
+    assert_allclose(correlation_coeffs_sparse, correlation_coeffs)
 
     # again without centering, compare with sparse
     correlation_coeffs = coeff(X, y, center=False)
     correlation_coeffs_sparse = coeff(sparse.csr_matrix(X), y, center=False)
-    assert_array_almost_equal(correlation_coeffs_sparse, correlation_coeffs)
+    assert_allclose(correlation_coeffs_sparse, correlation_coeffs)
 
 
 def test_f_regression():
@@ -121,14 +120,14 @@ def test_f_regression():
     # with centering, compare with sparse
     F, pv = f_regression(X, y, center=True)
     F_sparse, pv_sparse = f_regression(sparse.csr_matrix(X), y, center=True)
-    assert_array_almost_equal(F_sparse, F)
-    assert_array_almost_equal(pv_sparse, pv)
+    assert_allclose(F_sparse, F)
+    assert_allclose(pv_sparse, pv)
 
     # again without centering, compare with sparse
     F, pv = f_regression(X, y, center=False)
     F_sparse, pv_sparse = f_regression(sparse.csr_matrix(X), y, center=False)
-    assert_array_almost_equal(F_sparse, F)
-    assert_array_almost_equal(pv_sparse, pv)
+    assert_allclose(F_sparse, F)
+    assert_allclose(pv_sparse, pv)
 
 
 def test_f_regression_r_regression_consistency():
@@ -153,8 +152,8 @@ def test_f_regression_input_dtype():
 
     F1, pv1 = f_regression(X, y)
     F2, pv2 = f_regression(X, y.astype(float))
-    assert_array_almost_equal(F1, F2, 5)
-    assert_array_almost_equal(pv1, pv2, 5)
+    assert_allclose(F1, F2, 5)
+    assert_allclose(pv1, pv2, 5)
 
 
 def test_f_regression_center():
@@ -170,7 +169,7 @@ def test_f_regression_center():
 
     F1, _ = f_regression(X, Y, center=True)
     F2, _ = f_regression(X, Y, center=False)
-    assert_array_almost_equal(F1 * (n_samples - 1.) / (n_samples - 2.), F2)
+    assert_allclose(F1 * (n_samples - 1.) / (n_samples - 2.), F2)
     assert_almost_equal(F2[0], 0.232558139)  # value from statsmodels OLS
 
 
@@ -309,7 +308,7 @@ def test_select_heuristics_classif():
             f_classif, mode=mode, param=0.01).fit(X, y).transform(X)
         assert_array_equal(X_r, X_r2)
         support = univariate_filter.get_support()
-        assert_array_almost_equal(support, gtruth)
+        assert_allclose(support, gtruth)
 
 
 ##############################################################################
@@ -319,7 +318,7 @@ def test_select_heuristics_classif():
 def assert_best_scores_kept(score_filter):
     scores = score_filter.scores_
     support = score_filter.get_support()
-    assert_array_almost_equal(np.sort(scores[support]),
+    assert_allclose(np.sort(scores[support]),
                               np.sort(scores)[-support.sum():])
 
 
@@ -442,8 +441,8 @@ def test_boundary_case_ch2():
     X = np.array([[10, 20], [20, 20], [20, 30]])
     y = np.array([[1], [0], [0]])
     scores, pvalues = chi2(X, y)
-    assert_array_almost_equal(scores, np.array([4., 0.71428571]))
-    assert_array_almost_equal(pvalues, np.array([0.04550026, 0.39802472]))
+    assert_allclose(scores, np.array([4., 0.71428571]))
+    assert_allclose(pvalues, np.array([0.04550026, 0.39802472]))
 
     filter_fdr = SelectFdr(chi2, alpha=0.1)
     filter_fdr.fit(X, y)

From 6bda200de5bc5c26d9d8ee70f80b9e68772921d8 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 09:49:29 +0200
Subject: [PATCH 38/43] fixup! Prefer assert_allclose over
 assert_array_almost_equal

Revert changes for unrelated tests.
---
 .../feature_selection/tests/test_feature_select.py  | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index ee34009352e8d..dedbf57a89417 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -11,6 +11,7 @@
 
 from sklearn.utils._testing import assert_almost_equal
 from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
 from sklearn.utils._testing import assert_warns
 from sklearn.utils._testing import ignore_warnings
 from sklearn.utils._testing import assert_warns_message
@@ -59,8 +60,8 @@ def test_f_oneway_ints():
 
     # test that is gives the same result as with float
     f, p = f_oneway(X.astype(float), y)
-    assert_allclose(f, fint)
-    assert_allclose(p, pint)
+    assert_array_almost_equal(f, fint, decimal=4)
+    assert_array_almost_equal(p, pint, decimal=4)
 
 
 def test_f_classif():
@@ -79,8 +80,8 @@ def test_f_classif():
     assert (pv < 1).all()
     assert (pv[:5] < 0.05).all()
     assert (pv[5:] > 1.e-4).all()
-    assert_allclose(F_sparse, F)
-    assert_allclose(pv_sparse, pv)
+    assert_array_almost_equal(F_sparse, F)
+    assert_array_almost_equal(pv_sparse, pv)
 
 
 @pytest.mark.parametrize("coeff", [abs_r_regression, r_regression])
@@ -441,8 +442,8 @@ def test_boundary_case_ch2():
     X = np.array([[10, 20], [20, 20], [20, 30]])
     y = np.array([[1], [0], [0]])
     scores, pvalues = chi2(X, y)
-    assert_allclose(scores, np.array([4., 0.71428571]))
-    assert_allclose(pvalues, np.array([0.04550026, 0.39802472]))
+    assert_array_almost_equal(scores, np.array([4., 0.71428571]))
+    assert_array_almost_equal(pvalues, np.array([0.04550026, 0.39802472]))
 
     filter_fdr = SelectFdr(chi2, alpha=0.1)
     filter_fdr.fit(X, y)

From 76830bb0f6c646546219e418b7a392f9fa86cb7b Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 12 Apr 2021 16:34:37 +0200
Subject: [PATCH 39/43] Split test into several

Also check against numpy's implementation of the
correlation coefficient.

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 .../tests/test_feature_select.py              | 42 ++++++++++++-------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index dedbf57a89417..ad51f1dc5da25 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -9,7 +9,7 @@
 
 import pytest
 
-from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_almost_equal, _convert_container
 from sklearn.utils._testing import assert_array_equal
 from sklearn.utils._testing import assert_array_almost_equal
 from sklearn.utils._testing import assert_warns
@@ -84,25 +84,35 @@ def test_f_classif():
     assert_array_almost_equal(pv_sparse, pv)
 
 
-@pytest.mark.parametrize("coeff", [abs_r_regression, r_regression])
-def test_r_regression(coeff):
-    X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
+@pytest.mark.parametrize("center", [True, False])
+def test_r_regression(center):
+    X, y = make_regression(n_samples=2000, n_features=20, n_informative=5,
                            shuffle=False, random_state=0)
 
-    correlation_coeffs = coeff(X, y)
-    if coeff == r_regression:
-        assert ((correlation_coeffs > -1).all())
-    assert ((correlation_coeffs < 1).all())
+    corr_coeffs = r_regression(X, y, center=center)
+    assert ((-1 < corr_coeffs).all())
+    assert ((corr_coeffs < 1).all())
 
-    # with centering, compare with sparse
-    correlation_coeffs = coeff(X, y, center=True)
-    correlation_coeffs_sparse = coeff(sparse.csr_matrix(X), y, center=True)
-    assert_allclose(correlation_coeffs_sparse, correlation_coeffs)
+    sparse_X = _convert_container(X, "sparse")
 
-    # again without centering, compare with sparse
-    correlation_coeffs = coeff(X, y, center=False)
-    correlation_coeffs_sparse = coeff(sparse.csr_matrix(X), y, center=False)
-    assert_allclose(correlation_coeffs_sparse, correlation_coeffs)
+    sparse_corr_coeffs = r_regression(sparse_X, y, center=center)
+    assert_allclose(sparse_corr_coeffs, corr_coeffs)
+
+    # Testing against numpy for reference
+    Z = np.hstack((X, y[:, np.newaxis]))
+    correlation_matrix = np.corrcoef(Z, rowvar=False)
+    np_corr_coeffs = correlation_matrix[:-1, -1]
+    assert_array_almost_equal(np_corr_coeffs, corr_coeffs, decimal=3)
+
+
+@pytest.mark.parametrize("array_like", ["array", "sparse_csr", "sparse_csc"])
+def test_abs_r_regression(array_like):
+    X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
+                           shuffle=False, random_state=0)
+
+    X = _convert_container(X, array_like)
+
+    assert_allclose(abs_r_regression(X, y), np.abs(r_regression(X, y)))
 
 
 def test_f_regression():

From c83161be4f4c82e837eede2ac9cbc0f880d3946d Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Wed, 14 Apr 2021 11:16:44 +0200
Subject: [PATCH 40/43] fixup! Improve wording abs_r_regression

---
 sklearn/feature_selection/_univariate_selection.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index d554ff5300ab2..ba5b9e8438fac 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -259,7 +259,7 @@ def r_regression(X, y, *, center=True):
     Returns
     -------
     correlation_coefficient : ndarray of shape (n_features,)
-        Pearson R correlation coefficients of features.
+        Pearson's R correlation coefficients of features.
 
     See Also
     --------
@@ -344,7 +344,7 @@ def f_regression(X, y, *, center=True):
     --------
     abs_r_regression: Absolute value of Pearson's R correlation coefficients
         between label and features for regression tasks.
-    r_regression: Pearson R between label/feature for regression tasks.
+    r_regression: Pearson's R between label/feature for regression tasks.
     f_classif: ANOVA F-value between label/feature for classification tasks.
     chi2: Chi-squared stats of non-negative features for classification tasks.
     SelectKBest: Select features based on the k highest scores.

From 3e4743c29362be51c208f8511906cbfadf2db49b Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 16 Apr 2021 10:26:27 +0200
Subject: [PATCH 41/43] Drop `feature_selection.abs_r_regression`

See discussions:
https://github.com/scikit-learn/scikit-learn/pull/17169#issuecomment-820997349
---
 doc/modules/classes.rst                       |  1 -
 doc/whats_new/v1.0.rst                        |  7 +--
 sklearn/feature_selection/__init__.py         |  2 -
 .../_univariate_selection.py                  | 25 -----------
 .../tests/test_feature_select.py              | 43 -------------------
 5 files changed, 2 insertions(+), 76 deletions(-)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 195d63efa1def..d56914f874b42 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -557,7 +557,6 @@ From text
    :toctree: generated/
    :template: function.rst
 
-   feature_selection.abs_r_regression
    feature_selection.chi2
    feature_selection.f_classif
    feature_selection.f_regression
diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index b0e8ca63e78f8..eaf02942cf316 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -106,11 +106,8 @@ Changelog
 :mod:`sklearn.feature_selection`
 ................................
 
-- |Feature| :func:`feature_selection.abs_r_regression` is a new criterion
-  which can be used with :class:`feature_selection.SelectKBest` to select
-  variables. It is the absolution values of
-  :func:`feature_selection.r_regression` which computes Pearson's R correlation
-  coefficients between the features and the target.
+- |Feature| :func:`feature_selection.r_regression` computes Pearson's R
+  correlation coefficients between the features and the target.
   :pr:`17169` by `Dmytro Lituiev <DSLituiev>`
   and `Julien Jerphanion <jjerphan>`.
 
diff --git a/sklearn/feature_selection/__init__.py b/sklearn/feature_selection/__init__.py
index eae46cf21006d..ef894b40065de 100644
--- a/sklearn/feature_selection/__init__.py
+++ b/sklearn/feature_selection/__init__.py
@@ -9,7 +9,6 @@
 from ._univariate_selection import f_oneway
 from ._univariate_selection import f_regression
 from ._univariate_selection import r_regression
-from ._univariate_selection import abs_r_regression
 from ._univariate_selection import SelectPercentile
 from ._univariate_selection import SelectKBest
 from ._univariate_selection import SelectFpr
@@ -42,7 +41,6 @@
            'SelectFromModel',
            'SelectPercentile',
            'VarianceThreshold',
-           'abs_r_regression',
            'chi2',
            'f_classif',
            'f_oneway',
diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index ba5b9e8438fac..4cbe4500ffa16 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -263,8 +263,6 @@ def r_regression(X, y, *, center=True):
 
     See Also
     --------
-    abs_r_regression: Absolute value of Pearson's R correlation coefficients
-        between label and features for regression tasks.
     f_regression: Univariate linear regression tests returning f-statistic
         and p-values
     mutual_info_regression: Mutual information for a continuous target.
@@ -315,9 +313,6 @@ def f_regression(X, y, *, center=True):
     Note however that contrary to :func:`f_regression`, :func:`r_regression`
     values lie in [-1, 1] and can thus be negative.
 
-    Alternatively, :func:`abs_r_regression` can be used to rank features by
-    correlation magnitude instead.
-
     Read more in the :ref:`User Guide <univariate_feature_selection>`.
 
     Parameters
@@ -342,8 +337,6 @@ def f_regression(X, y, *, center=True):
 
     See Also
     --------
-    abs_r_regression: Absolute value of Pearson's R correlation coefficients
-        between label and features for regression tasks.
     r_regression: Pearson's R between label/feature for regression tasks.
     f_classif: ANOVA F-value between label/feature for classification tasks.
     chi2: Chi-squared stats of non-negative features for classification tasks.
@@ -363,22 +356,6 @@ def f_regression(X, y, *, center=True):
     return f_statistic, p_values
 
 
-def abs_r_regression(X, y, center=True):
-    """Absolute value of Pearson's R from univariate linear regressions.
-
-    .. versionadded:: 1.0
-
-    This convenience wrapper is to be used with
-    :class:`~sklearn.feature_selection.SelectKBest`.
-
-    See Also
-    --------
-    r_regression: Univariate linear regression tests returning Pearson's R
-        correlation coefficient.
-    SelectKBest: Select features based on the k highest scores.
-    """
-    return np.abs(r_regression(X, y, center=center))
-
 ######################################################################
 # Base classes
 
@@ -576,8 +553,6 @@ class SelectKBest(_BaseFilter):
     f_classif: ANOVA F-value between label/feature for classification tasks.
     mutual_info_classif: Mutual information for a discrete target.
     chi2: Chi-squared stats of non-negative features for classification tasks.
-    abs_r_regression: Absolute value of Pearson's R between label and features
-        for regression tasks.
     f_regression: F-value between label/feature for regression tasks.
     mutual_info_regression: Mutual information for a continuous target.
     SelectPercentile: Select features based on percentile of the highest
diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index ad51f1dc5da25..852c8228b2a76 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -19,7 +19,6 @@
 
 from sklearn.datasets import make_classification, make_regression
 from sklearn.feature_selection import (
-    abs_r_regression,
     chi2,
     f_classif,
     f_oneway,
@@ -105,16 +104,6 @@ def test_r_regression(center):
     assert_array_almost_equal(np_corr_coeffs, corr_coeffs, decimal=3)
 
 
-@pytest.mark.parametrize("array_like", ["array", "sparse_csr", "sparse_csc"])
-def test_abs_r_regression(array_like):
-    X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
-                           shuffle=False, random_state=0)
-
-    X = _convert_container(X, array_like)
-
-    assert_allclose(abs_r_regression(X, y), np.abs(r_regression(X, y)))
-
-
 def test_f_regression():
     # Test whether the F test yields meaningful results
     # on a simple simulated regression problem
@@ -141,19 +130,6 @@ def test_f_regression():
     assert_allclose(pv_sparse, pv)
 
 
-def test_f_regression_r_regression_consistency():
-    # Test the equivalence of f_regression and abs_r_regression for variable
-    # selection using the returned values ordering
-    X, y = make_regression(n_samples=200, n_features=1000,
-                           shuffle=False, random_state=0)
-
-    Fs, _ = f_regression(X, y)
-
-    assert_array_equal(Fs.argsort(), abs_r_regression(X, y).argsort())
-
-    assert_allclose(abs_r_regression(X, y), np.abs(r_regression(X, y)))
-
-
 def test_f_regression_input_dtype():
     # Test whether f_regression returns the same value
     # for any numeric data_type
@@ -408,25 +384,6 @@ def test_select_kbest_regression():
     assert_array_equal(support, gtruth)
 
 
-def test_select_kbest_abs_r_regression():
-    # Test whether the relative univariate feature selection
-    # gets the correct items in a simple regression problem
-    # with the k best heuristic
-    X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
-                           shuffle=False, random_state=0, noise=10)
-
-    univariate_filter = SelectKBest(abs_r_regression, k=5)
-    X_r = univariate_filter.fit(X, y).transform(X)
-    assert_best_scores_kept(univariate_filter)
-    X_r2 = GenericUnivariateSelect(
-        f_regression, mode='k_best', param=5).fit(X, y).transform(X)
-    assert_array_equal(X_r, X_r2)
-    support = univariate_filter.get_support()
-    gtruth = np.zeros(20)
-    gtruth[:5] = 1
-    assert_array_equal(support, gtruth)
-
-
 def test_select_heuristics_regression():
     # Test whether the relative univariate feature selection
     # gets the correct items in a simple regression problem

From fc57caa5eb6f7cfe9d27cfa4c3cc2be07feaa08e Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Tue, 20 Apr 2021 18:54:40 +0200
Subject: [PATCH 42/43] Use multi-line docstring (PEP 257) for better
 integration with IDEs

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/feature_selection/_univariate_selection.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 4cbe4500ffa16..fd7f7ac19cc29 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -230,8 +230,9 @@ def chi2(X, y):
 
 
 def r_regression(X, y, *, center=True):
-    """Compute Pearson's R correlation coefficients between the features and
-    the target.
+    """Compute Pearson's r for each features and the target.
+
+    Pearson's r is also known as the Pearson correlation coefficient.
 
     .. versionadded:: 1.0
 

From d153dda1fab7af9017b007e68769e4642978260b Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Tue, 20 Apr 2021 18:55:32 +0200
Subject: [PATCH 43/43] Rephrase docstring

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/feature_selection/_univariate_selection.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index fd7f7ac19cc29..7fc69a4b13cf2 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -309,10 +309,18 @@ def f_regression(X, y, *, center=True):
        std(y)) using r_regression function.
     2. It is converted to an F score and then to a p-value.
 
-    If p-values are not needed, :func:`r_regression` can be used to rank
-    features as a slightly cheaper alternative to :func:`f_regression`.
+    :func:`f_regression` is derived from :func:`r_regression` and will rank
+    features in the same order if all the features are positively correlated
+    with the target.
+
     Note however that contrary to :func:`f_regression`, :func:`r_regression`
-    values lie in [-1, 1] and can thus be negative.
+    values lie in [-1, 1] and can thus be negative. :func:`f_regression` is
+    therefore recommended as a feature selection criterion to identify
+    potentially predictive feature for a downstream classifier, irrespective of
+    the sign of the association with the target variable.
+
+    Furthermore :func:`f_regression` returns p-values while
+    :func:`r_regression` does not.
 
     Read more in the :ref:`User Guide <univariate_feature_selection>`.