From eacdba892383b8db78660ebd3593f128d78227e8 Mon Sep 17 00:00:00 2001
From: Alihan Zihna <a.zihna@ckhgbdp.onmicrosoft.com>
Date: Thu, 25 Feb 2021 12:01:13 +0000
Subject: [PATCH 1/5] Change assert from sklearn to pytest style

---
 sklearn/tests/test_discriminant_analysis.py | 54 +++++++++++++--------
 1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
index 18364ce156f87..4e129256f5e1e 100644
--- a/sklearn/tests/test_discriminant_analysis.py
+++ b/sklearn/tests/test_discriminant_analysis.py
@@ -5,14 +5,10 @@
 from scipy import linalg
 
 from sklearn.utils import check_random_state
-from sklearn.utils._testing import assert_array_equal, assert_no_warnings
+from sklearn.utils._testing import assert_array_equal
 from sklearn.utils._testing import assert_array_almost_equal
 from sklearn.utils._testing import assert_allclose
 from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_raises
-from sklearn.utils._testing import assert_raise_message
-from sklearn.utils._testing import assert_warns
-from sklearn.utils._testing import ignore_warnings
 
 from sklearn.datasets import make_blobs
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
@@ -89,15 +85,22 @@ def test_lda_predict():
 
     # Test invalid shrinkages
     clf = LinearDiscriminantAnalysis(solver="lsqr", shrinkage=-0.2231)
-    assert_raises(ValueError, clf.fit, X, y)
+    with pytest.raises(ValueError):
+        clf.fit(X, y)
+
     clf = LinearDiscriminantAnalysis(solver="eigen", shrinkage="dummy")
-    assert_raises(ValueError, clf.fit, X, y)
+    with pytest.raises(ValueError):
+        clf.fit(X, y)
+
     clf = LinearDiscriminantAnalysis(solver="svd", shrinkage="auto")
-    assert_raises(NotImplementedError, clf.fit, X, y)
+    with pytest.raises(NotImplementedError):
+        clf.fit(X, y)
+
     clf = LinearDiscriminantAnalysis(solver="lsqr", shrinkage=np.array([1, 2]))
     with pytest.raises(TypeError,
                        match="shrinkage must be a float or a string"):
         clf.fit(X, y)
+
     clf = LinearDiscriminantAnalysis(solver="lsqr",
                                      shrinkage=0.1,
                                      covariance_estimator=ShrunkCovariance())
@@ -106,9 +109,11 @@ def test_lda_predict():
                               "parameters are not None. "
                               "Only one of the two can be set.")):
         clf.fit(X, y)
+
     # Test unknown solver
     clf = LinearDiscriminantAnalysis(solver="dummy")
-    assert_raises(ValueError, clf.fit, X, y)
+    with pytest.raises(ValueError):
+        clf.fit(X, y)
 
     # test bad solver with covariance_estimator
     clf = LinearDiscriminantAnalysis(solver="svd",
@@ -199,7 +204,10 @@ def test_lda_priors():
     priors = np.array([0.5, -0.5])
     clf = LinearDiscriminantAnalysis(priors=priors)
     msg = "priors must be non-negative"
-    assert_raise_message(ValueError, msg, clf.fit, X, y)
+
+    with pytest.raises(ValueError,
+                       match=msg):
+        clf.fit(X, y)
 
     # Test that priors passed as a list are correctly handled (run to see if
     # failure)
@@ -210,7 +218,10 @@ def test_lda_priors():
     priors = np.array([0.5, 0.6])
     prior_norm = np.array([0.45, 0.55])
     clf = LinearDiscriminantAnalysis(priors=priors)
-    assert_warns(UserWarning, clf.fit, X, y)
+
+    with pytest.warns(UserWarning):
+        clf.fit(X, y)
+
     assert_array_almost_equal(clf.priors_, prior_norm, 2)
 
 
@@ -247,7 +258,10 @@ def test_lda_transform():
     clf = LinearDiscriminantAnalysis(solver="lsqr", n_components=1)
     clf.fit(X, y)
     msg = "transform not implemented for 'lsqr'"
-    assert_raise_message(NotImplementedError, msg, clf.transform, X)
+
+    with pytest.raises(NotImplementedError,
+                       match=msg):
+        clf.transform(X)
 
 
 def test_lda_explained_variance_ratio():
@@ -424,7 +438,8 @@ def test_lda_dimension_warning(n_classes, n_features):
     for n_components in [max_components - 1, None, max_components]:
         # if n_components <= min(n_classes - 1, n_features), no warning
         lda = LinearDiscriminantAnalysis(n_components=n_components)
-        assert_no_warnings(lda.fit, X, y)
+        with pytest.warns(None):
+            lda.fit(X, y)
 
     for n_components in [max_components + 1,
                          max(n_features, n_classes - 1) + 1]:
@@ -486,7 +501,8 @@ def test_qda():
     assert np.any(y_pred3 != y7)
 
     # Classes should have at least 2 elements
-    assert_raises(ValueError, clf.fit, X6, y4)
+    with pytest.raises(ValueError):
+        clf.fit(X6, y4)
 
 
 def test_qda_priors():
@@ -522,25 +538,23 @@ def test_qda_store_covariance():
     )
 
 
+@pytest.mark.filterwarnings()
 def test_qda_regularization():
     # the default is reg_param=0. and will cause issues
     # when there is a constant variable
     clf = QuadraticDiscriminantAnalysis()
-    with ignore_warnings():
-        y_pred = clf.fit(X2, y6).predict(X2)
+    y_pred = clf.fit(X2, y6).predict(X2)
     assert np.any(y_pred != y6)
 
     # adding a little regularization fixes the problem
     clf = QuadraticDiscriminantAnalysis(reg_param=0.01)
-    with ignore_warnings():
-        clf.fit(X2, y6)
+    clf.fit(X2, y6)
     y_pred = clf.predict(X2)
     assert_array_equal(y_pred, y6)
 
     # Case n_samples_in_a_class < n_features
     clf = QuadraticDiscriminantAnalysis(reg_param=0.1)
-    with ignore_warnings():
-        clf.fit(X5, y5)
+    clf.fit(X5, y5)
     y_pred5 = clf.predict(X5)
     assert_array_equal(y_pred5, y5)
 

From 0951129ee64b402c890a0bd1b98a25b9a8363a44 Mon Sep 17 00:00:00 2001
From: Alihan Zihna <a.zihna@ckhgbdp.onmicrosoft.com>
Date: Thu, 25 Feb 2021 17:49:50 +0000
Subject: [PATCH 2/5] Remove filter and warns instead. Add explanations.

---
 sklearn/tests/test_discriminant_analysis.py | 23 +++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
index 4e129256f5e1e..308253b90e4ed 100644
--- a/sklearn/tests/test_discriminant_analysis.py
+++ b/sklearn/tests/test_discriminant_analysis.py
@@ -538,24 +538,35 @@ def test_qda_store_covariance():
     )
 
 
-@pytest.mark.filterwarnings()
 def test_qda_regularization():
     # the default is reg_param=0. and will cause issues
     # when there is a constant variable
+    # UserWarning will raised due to the constant variable
+    # Runtime warning will be raised due to unregularized
+    # constant variable in the covariance matrix.
     clf = QuadraticDiscriminantAnalysis()
-    y_pred = clf.fit(X2, y6).predict(X2)
+    with (pytest.warns(UserWarning) and
+          pytest.warns(RuntimeWarning)):
+        y_pred = clf.fit(X2, y6)
+        y_pred = clf.predict(X2)
     assert np.any(y_pred != y6)
 
     # adding a little regularization fixes the problem
+    # No Runtime Warning will be raised with regularization
+    # But UserWarning will persist
     clf = QuadraticDiscriminantAnalysis(reg_param=0.01)
-    clf.fit(X2, y6)
-    y_pred = clf.predict(X2)
+    with pytest.warns(UserWarning):
+        clf.fit(X2, y6)
+        y_pred = clf.predict(X2)
     assert_array_equal(y_pred, y6)
 
     # Case n_samples_in_a_class < n_features
+    # UserWarning should persist
+    # No RuntimeWarning should be seen
     clf = QuadraticDiscriminantAnalysis(reg_param=0.1)
-    clf.fit(X5, y5)
-    y_pred5 = clf.predict(X5)
+    with pytest.warns(UserWarning):
+        clf.fit(X5, y5)
+        y_pred5 = clf.predict(X5)
     assert_array_equal(y_pred5, y5)
 
 

From 4c0e574db3e1d2d8330f0ee36cdd000339cb3426 Mon Sep 17 00:00:00 2001
From: Alihan Zihna <a.zihna@ckhgbdp.onmicrosoft.com>
Date: Sat, 27 Feb 2021 20:25:13 +0000
Subject: [PATCH 3/5] Fix early lines breaks and write explicit warning msgs

---
 sklearn/tests/test_discriminant_analysis.py | 30 +++++++++++++--------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
index 308253b90e4ed..37246a53d9f09 100644
--- a/sklearn/tests/test_discriminant_analysis.py
+++ b/sklearn/tests/test_discriminant_analysis.py
@@ -1,3 +1,5 @@
+import re
+
 import numpy as np
 
 import pytest
@@ -205,8 +207,7 @@ def test_lda_priors():
     clf = LinearDiscriminantAnalysis(priors=priors)
     msg = "priors must be non-negative"
 
-    with pytest.raises(ValueError,
-                       match=msg):
+    with pytest.raises(ValueError, match=msg):
         clf.fit(X, y)
 
     # Test that priors passed as a list are correctly handled (run to see if
@@ -259,8 +260,7 @@ def test_lda_transform():
     clf.fit(X, y)
     msg = "transform not implemented for 'lsqr'"
 
-    with pytest.raises(NotImplementedError,
-                       match=msg):
+    with pytest.raises(NotImplementedError, match=msg):
         clf.transform(X)
 
 
@@ -541,32 +541,40 @@ def test_qda_store_covariance():
 def test_qda_regularization():
     # the default is reg_param=0. and will cause issues
     # when there is a constant variable
+
     # UserWarning will raised due to the constant variable
+    constant_msg = re.escape(
+        "Variables are collinear"
+    )
+
     # Runtime warning will be raised due to unregularized
     # constant variable in the covariance matrix.
+    covariance_msg = re.escape(
+        "divide by zero encountered in lag"
+    )
     clf = QuadraticDiscriminantAnalysis()
-    with (pytest.warns(UserWarning) and
-          pytest.warns(RuntimeWarning)):
+    with (pytest.warns(UserWarning, match=constant_msg) and
+          pytest.warns(RuntimeWarning, match=covariance_msg)):
         y_pred = clf.fit(X2, y6)
-        y_pred = clf.predict(X2)
+    y_pred = clf.predict(X2)
     assert np.any(y_pred != y6)
 
     # adding a little regularization fixes the problem
     # No Runtime Warning will be raised with regularization
     # But UserWarning will persist
     clf = QuadraticDiscriminantAnalysis(reg_param=0.01)
-    with pytest.warns(UserWarning):
+    with pytest.warns(UserWarning, match=constant_msg):
         clf.fit(X2, y6)
-        y_pred = clf.predict(X2)
+    y_pred = clf.predict(X2)
     assert_array_equal(y_pred, y6)
 
     # Case n_samples_in_a_class < n_features
     # UserWarning should persist
     # No RuntimeWarning should be seen
     clf = QuadraticDiscriminantAnalysis(reg_param=0.1)
-    with pytest.warns(UserWarning):
+    with pytest.warns(UserWarning, match=constant_msg):
         clf.fit(X5, y5)
-        y_pred5 = clf.predict(X5)
+    y_pred5 = clf.predict(X5)
     assert_array_equal(y_pred5, y5)
 
 

From 3327e31e5e9af8907d4c4a146d250200e30334c7 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 1 Mar 2021 11:08:46 +0100
Subject: [PATCH 4/5] Fix warning checks for QDA without regularization

---
 sklearn/tests/test_discriminant_analysis.py | 44 ++++++++++-----------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
index 37246a53d9f09..8c53bf9ed2567 100644
--- a/sklearn/tests/test_discriminant_analysis.py
+++ b/sklearn/tests/test_discriminant_analysis.py
@@ -539,40 +539,36 @@ def test_qda_store_covariance():
 
 
 def test_qda_regularization():
-    # the default is reg_param=0. and will cause issues
-    # when there is a constant variable
+    # The default is reg_param=0. and will cause issues when there is a
+    # constant variable.
 
-    # UserWarning will raised due to the constant variable
-    constant_msg = re.escape(
-        "Variables are collinear"
-    )
-
-    # Runtime warning will be raised due to unregularized
-    # constant variable in the covariance matrix.
-    covariance_msg = re.escape(
-        "divide by zero encountered in lag"
-    )
+    # Fitting on data with constant variable triggers an UserWarning.
+    collinear_msg = "Variables are collinear"
     clf = QuadraticDiscriminantAnalysis()
-    with (pytest.warns(UserWarning, match=constant_msg) and
-          pytest.warns(RuntimeWarning, match=covariance_msg)):
-        y_pred = clf.fit(X2, y6)
-    y_pred = clf.predict(X2)
+    with pytest.warns(UserWarning, match=collinear_msg):
+            y_pred = clf.fit(X2, y6)
+
+    # XXX: RuntimeWarning is also raised at predict time because of divisions
+    # by zero when the model is fit with a constant feature and without
+    # regularization: should this be considered a bug? Either by the fit-time
+    # message more informative, raising and exception instead of a warning in
+    # this case or somehow changing predict to avoid division by zero.
+    with pytest.warns(RuntimeWarning, match="divide by zero"):
+        y_pred = clf.predict(X2)
     assert np.any(y_pred != y6)
 
-    # adding a little regularization fixes the problem
-    # No Runtime Warning will be raised with regularization
-    # But UserWarning will persist
+    # Adding a little regularization fixes the division by zero at predict
+    # time. But UserWarning will persist at fit time.
     clf = QuadraticDiscriminantAnalysis(reg_param=0.01)
-    with pytest.warns(UserWarning, match=constant_msg):
+    with pytest.warns(UserWarning, match=collinear_msg):
         clf.fit(X2, y6)
     y_pred = clf.predict(X2)
     assert_array_equal(y_pred, y6)
 
-    # Case n_samples_in_a_class < n_features
-    # UserWarning should persist
-    # No RuntimeWarning should be seen
+    # UserWarning should also be there for the n_samples_in_a_class <
+    # n_features case.
     clf = QuadraticDiscriminantAnalysis(reg_param=0.1)
-    with pytest.warns(UserWarning, match=constant_msg):
+    with pytest.warns(UserWarning, match=collinear_msg):
         clf.fit(X5, y5)
     y_pred5 = clf.predict(X5)
     assert_array_equal(y_pred5, y5)

From d53da1b11f5ebe3ea619c847de4be8d45cfd603a Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 1 Mar 2021 11:11:57 +0100
Subject: [PATCH 5/5] flake8 fixes

---
 sklearn/tests/test_discriminant_analysis.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
index 8c53bf9ed2567..3dd22e2154400 100644
--- a/sklearn/tests/test_discriminant_analysis.py
+++ b/sklearn/tests/test_discriminant_analysis.py
@@ -1,5 +1,3 @@
-import re
-
 import numpy as np
 
 import pytest
@@ -546,7 +544,7 @@ def test_qda_regularization():
     collinear_msg = "Variables are collinear"
     clf = QuadraticDiscriminantAnalysis()
     with pytest.warns(UserWarning, match=collinear_msg):
-            y_pred = clf.fit(X2, y6)
+        y_pred = clf.fit(X2, y6)
 
     # XXX: RuntimeWarning is also raised at predict time because of divisions
     # by zero when the model is fit with a constant feature and without