From eacdba892383b8db78660ebd3593f128d78227e8 Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Thu, 25 Feb 2021 12:01:13 +0000 Subject: [PATCH 1/5] Change assert from sklearn to pytest style --- sklearn/tests/test_discriminant_analysis.py | 54 +++++++++++++-------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 18364ce156f87..4e129256f5e1e 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -5,14 +5,10 @@ from scipy import linalg from sklearn.utils import check_random_state -from sklearn.utils._testing import assert_array_equal, assert_no_warnings +from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import assert_array_almost_equal from sklearn.utils._testing import assert_allclose from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_raises -from sklearn.utils._testing import assert_raise_message -from sklearn.utils._testing import assert_warns -from sklearn.utils._testing import ignore_warnings from sklearn.datasets import make_blobs from sklearn.discriminant_analysis import LinearDiscriminantAnalysis @@ -89,15 +85,22 @@ def test_lda_predict(): # Test invalid shrinkages clf = LinearDiscriminantAnalysis(solver="lsqr", shrinkage=-0.2231) - assert_raises(ValueError, clf.fit, X, y) + with pytest.raises(ValueError): + clf.fit(X, y) + clf = LinearDiscriminantAnalysis(solver="eigen", shrinkage="dummy") - assert_raises(ValueError, clf.fit, X, y) + with pytest.raises(ValueError): + clf.fit(X, y) + clf = LinearDiscriminantAnalysis(solver="svd", shrinkage="auto") - assert_raises(NotImplementedError, clf.fit, X, y) + with pytest.raises(NotImplementedError): + clf.fit(X, y) + clf = LinearDiscriminantAnalysis(solver="lsqr", shrinkage=np.array([1, 2])) with pytest.raises(TypeError, match="shrinkage must be a float or a string"): clf.fit(X, y) + clf = LinearDiscriminantAnalysis(solver="lsqr", shrinkage=0.1, covariance_estimator=ShrunkCovariance()) @@ -106,9 +109,11 @@ def test_lda_predict(): "parameters are not None. " "Only one of the two can be set.")): clf.fit(X, y) + # Test unknown solver clf = LinearDiscriminantAnalysis(solver="dummy") - assert_raises(ValueError, clf.fit, X, y) + with pytest.raises(ValueError): + clf.fit(X, y) # test bad solver with covariance_estimator clf = LinearDiscriminantAnalysis(solver="svd", @@ -199,7 +204,10 @@ def test_lda_priors(): priors = np.array([0.5, -0.5]) clf = LinearDiscriminantAnalysis(priors=priors) msg = "priors must be non-negative" - assert_raise_message(ValueError, msg, clf.fit, X, y) + + with pytest.raises(ValueError, + match=msg): + clf.fit(X, y) # Test that priors passed as a list are correctly handled (run to see if # failure) @@ -210,7 +218,10 @@ def test_lda_priors(): priors = np.array([0.5, 0.6]) prior_norm = np.array([0.45, 0.55]) clf = LinearDiscriminantAnalysis(priors=priors) - assert_warns(UserWarning, clf.fit, X, y) + + with pytest.warns(UserWarning): + clf.fit(X, y) + assert_array_almost_equal(clf.priors_, prior_norm, 2) @@ -247,7 +258,10 @@ def test_lda_transform(): clf = LinearDiscriminantAnalysis(solver="lsqr", n_components=1) clf.fit(X, y) msg = "transform not implemented for 'lsqr'" - assert_raise_message(NotImplementedError, msg, clf.transform, X) + + with pytest.raises(NotImplementedError, + match=msg): + clf.transform(X) def test_lda_explained_variance_ratio(): @@ -424,7 +438,8 @@ def test_lda_dimension_warning(n_classes, n_features): for n_components in [max_components - 1, None, max_components]: # if n_components <= min(n_classes - 1, n_features), no warning lda = LinearDiscriminantAnalysis(n_components=n_components) - assert_no_warnings(lda.fit, X, y) + with pytest.warns(None): + lda.fit(X, y) for n_components in [max_components + 1, max(n_features, n_classes - 1) + 1]: @@ -486,7 +501,8 @@ def test_qda(): assert np.any(y_pred3 != y7) # Classes should have at least 2 elements - assert_raises(ValueError, clf.fit, X6, y4) + with pytest.raises(ValueError): + clf.fit(X6, y4) def test_qda_priors(): @@ -522,25 +538,23 @@ def test_qda_store_covariance(): ) +@pytest.mark.filterwarnings() def test_qda_regularization(): # the default is reg_param=0. and will cause issues # when there is a constant variable clf = QuadraticDiscriminantAnalysis() - with ignore_warnings(): - y_pred = clf.fit(X2, y6).predict(X2) + y_pred = clf.fit(X2, y6).predict(X2) assert np.any(y_pred != y6) # adding a little regularization fixes the problem clf = QuadraticDiscriminantAnalysis(reg_param=0.01) - with ignore_warnings(): - clf.fit(X2, y6) + clf.fit(X2, y6) y_pred = clf.predict(X2) assert_array_equal(y_pred, y6) # Case n_samples_in_a_class < n_features clf = QuadraticDiscriminantAnalysis(reg_param=0.1) - with ignore_warnings(): - clf.fit(X5, y5) + clf.fit(X5, y5) y_pred5 = clf.predict(X5) assert_array_equal(y_pred5, y5) From 0951129ee64b402c890a0bd1b98a25b9a8363a44 Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Thu, 25 Feb 2021 17:49:50 +0000 Subject: [PATCH 2/5] Remove filter and warns instead. Add explanations. --- sklearn/tests/test_discriminant_analysis.py | 23 +++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 4e129256f5e1e..308253b90e4ed 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -538,24 +538,35 @@ def test_qda_store_covariance(): ) -@pytest.mark.filterwarnings() def test_qda_regularization(): # the default is reg_param=0. and will cause issues # when there is a constant variable + # UserWarning will raised due to the constant variable + # Runtime warning will be raised due to unregularized + # constant variable in the covariance matrix. clf = QuadraticDiscriminantAnalysis() - y_pred = clf.fit(X2, y6).predict(X2) + with (pytest.warns(UserWarning) and + pytest.warns(RuntimeWarning)): + y_pred = clf.fit(X2, y6) + y_pred = clf.predict(X2) assert np.any(y_pred != y6) # adding a little regularization fixes the problem + # No Runtime Warning will be raised with regularization + # But UserWarning will persist clf = QuadraticDiscriminantAnalysis(reg_param=0.01) - clf.fit(X2, y6) - y_pred = clf.predict(X2) + with pytest.warns(UserWarning): + clf.fit(X2, y6) + y_pred = clf.predict(X2) assert_array_equal(y_pred, y6) # Case n_samples_in_a_class < n_features + # UserWarning should persist + # No RuntimeWarning should be seen clf = QuadraticDiscriminantAnalysis(reg_param=0.1) - clf.fit(X5, y5) - y_pred5 = clf.predict(X5) + with pytest.warns(UserWarning): + clf.fit(X5, y5) + y_pred5 = clf.predict(X5) assert_array_equal(y_pred5, y5) From 4c0e574db3e1d2d8330f0ee36cdd000339cb3426 Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Sat, 27 Feb 2021 20:25:13 +0000 Subject: [PATCH 3/5] Fix early lines breaks and write explicit warning msgs --- sklearn/tests/test_discriminant_analysis.py | 30 +++++++++++++-------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 308253b90e4ed..37246a53d9f09 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -1,3 +1,5 @@ +import re + import numpy as np import pytest @@ -205,8 +207,7 @@ def test_lda_priors(): clf = LinearDiscriminantAnalysis(priors=priors) msg = "priors must be non-negative" - with pytest.raises(ValueError, - match=msg): + with pytest.raises(ValueError, match=msg): clf.fit(X, y) # Test that priors passed as a list are correctly handled (run to see if @@ -259,8 +260,7 @@ def test_lda_transform(): clf.fit(X, y) msg = "transform not implemented for 'lsqr'" - with pytest.raises(NotImplementedError, - match=msg): + with pytest.raises(NotImplementedError, match=msg): clf.transform(X) @@ -541,32 +541,40 @@ def test_qda_store_covariance(): def test_qda_regularization(): # the default is reg_param=0. and will cause issues # when there is a constant variable + # UserWarning will raised due to the constant variable + constant_msg = re.escape( + "Variables are collinear" + ) + # Runtime warning will be raised due to unregularized # constant variable in the covariance matrix. + covariance_msg = re.escape( + "divide by zero encountered in lag" + ) clf = QuadraticDiscriminantAnalysis() - with (pytest.warns(UserWarning) and - pytest.warns(RuntimeWarning)): + with (pytest.warns(UserWarning, match=constant_msg) and + pytest.warns(RuntimeWarning, match=covariance_msg)): y_pred = clf.fit(X2, y6) - y_pred = clf.predict(X2) + y_pred = clf.predict(X2) assert np.any(y_pred != y6) # adding a little regularization fixes the problem # No Runtime Warning will be raised with regularization # But UserWarning will persist clf = QuadraticDiscriminantAnalysis(reg_param=0.01) - with pytest.warns(UserWarning): + with pytest.warns(UserWarning, match=constant_msg): clf.fit(X2, y6) - y_pred = clf.predict(X2) + y_pred = clf.predict(X2) assert_array_equal(y_pred, y6) # Case n_samples_in_a_class < n_features # UserWarning should persist # No RuntimeWarning should be seen clf = QuadraticDiscriminantAnalysis(reg_param=0.1) - with pytest.warns(UserWarning): + with pytest.warns(UserWarning, match=constant_msg): clf.fit(X5, y5) - y_pred5 = clf.predict(X5) + y_pred5 = clf.predict(X5) assert_array_equal(y_pred5, y5) From 3327e31e5e9af8907d4c4a146d250200e30334c7 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 1 Mar 2021 11:08:46 +0100 Subject: [PATCH 4/5] Fix warning checks for QDA without regularization --- sklearn/tests/test_discriminant_analysis.py | 44 ++++++++++----------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 37246a53d9f09..8c53bf9ed2567 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -539,40 +539,36 @@ def test_qda_store_covariance(): def test_qda_regularization(): - # the default is reg_param=0. and will cause issues - # when there is a constant variable + # The default is reg_param=0. and will cause issues when there is a + # constant variable. - # UserWarning will raised due to the constant variable - constant_msg = re.escape( - "Variables are collinear" - ) - - # Runtime warning will be raised due to unregularized - # constant variable in the covariance matrix. - covariance_msg = re.escape( - "divide by zero encountered in lag" - ) + # Fitting on data with constant variable triggers an UserWarning. + collinear_msg = "Variables are collinear" clf = QuadraticDiscriminantAnalysis() - with (pytest.warns(UserWarning, match=constant_msg) and - pytest.warns(RuntimeWarning, match=covariance_msg)): - y_pred = clf.fit(X2, y6) - y_pred = clf.predict(X2) + with pytest.warns(UserWarning, match=collinear_msg): + y_pred = clf.fit(X2, y6) + + # XXX: RuntimeWarning is also raised at predict time because of divisions + # by zero when the model is fit with a constant feature and without + # regularization: should this be considered a bug? Either by the fit-time + # message more informative, raising and exception instead of a warning in + # this case or somehow changing predict to avoid division by zero. + with pytest.warns(RuntimeWarning, match="divide by zero"): + y_pred = clf.predict(X2) assert np.any(y_pred != y6) - # adding a little regularization fixes the problem - # No Runtime Warning will be raised with regularization - # But UserWarning will persist + # Adding a little regularization fixes the division by zero at predict + # time. But UserWarning will persist at fit time. clf = QuadraticDiscriminantAnalysis(reg_param=0.01) - with pytest.warns(UserWarning, match=constant_msg): + with pytest.warns(UserWarning, match=collinear_msg): clf.fit(X2, y6) y_pred = clf.predict(X2) assert_array_equal(y_pred, y6) - # Case n_samples_in_a_class < n_features - # UserWarning should persist - # No RuntimeWarning should be seen + # UserWarning should also be there for the n_samples_in_a_class < + # n_features case. clf = QuadraticDiscriminantAnalysis(reg_param=0.1) - with pytest.warns(UserWarning, match=constant_msg): + with pytest.warns(UserWarning, match=collinear_msg): clf.fit(X5, y5) y_pred5 = clf.predict(X5) assert_array_equal(y_pred5, y5) From d53da1b11f5ebe3ea619c847de4be8d45cfd603a Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 1 Mar 2021 11:11:57 +0100 Subject: [PATCH 5/5] flake8 fixes --- sklearn/tests/test_discriminant_analysis.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 8c53bf9ed2567..3dd22e2154400 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -1,5 +1,3 @@ -import re - import numpy as np import pytest @@ -546,7 +544,7 @@ def test_qda_regularization(): collinear_msg = "Variables are collinear" clf = QuadraticDiscriminantAnalysis() with pytest.warns(UserWarning, match=collinear_msg): - y_pred = clf.fit(X2, y6) + y_pred = clf.fit(X2, y6) # XXX: RuntimeWarning is also raised at predict time because of divisions # by zero when the model is fit with a constant feature and without