From 89ac17b0479568804cf66c3904fdbbf532b539dd Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Thu, 25 Mar 2021 09:27:04 +0000 Subject: [PATCH 01/12] Fix first half of tests --- sklearn/tests/test_naive_bayes.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index dcd4b07712357..382b5c51459ed 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -1,3 +1,4 @@ +import re import numpy as np import scipy.sparse @@ -118,7 +119,10 @@ def test_gnb_sample_weight(): def test_gnb_neg_priors(): """Test whether an error is raised in case of negative priors""" clf = GaussianNB(priors=np.array([-1., 2.])) - assert_raises(ValueError, clf.fit, X, y) + + msg = re.escape('Priors must be non-negative.') + with pytest.raises(ValueError, match=msg): + clf.fit(X, y) def test_gnb_priors(): @@ -146,13 +150,19 @@ def test_gnb_wrong_nb_priors(): """ Test whether an error is raised if the number of prior is different from the number of class""" clf = GaussianNB(priors=np.array([.25, .25, .25, .25])) - assert_raises(ValueError, clf.fit, X, y) + + msg = re.escape('Number of priors must match number of classes') + with pytest.raises(ValueError, match=msg): + clf.fit(X, y) def test_gnb_prior_greater_one(): """Test if an error is raised if the sum of prior greater than one""" clf = GaussianNB(priors=np.array([2., 1.])) - assert_raises(ValueError, clf.fit, X, y) + + msg = re.escape('The sum of the priors should be 1') + with pytest.raises(ValueError, match=msg): + clf.fit(X, y) def test_gnb_prior_large_bias(): @@ -339,9 +349,13 @@ def test_discretenb_provide_prior(DiscreteNaiveBayes): assert_array_almost_equal(prior, np.array([.5, .5])) # Inconsistent number of classes with prior - assert_raises(ValueError, clf.fit, [[0], [1], [2]], [0, 1, 2]) - assert_raises(ValueError, clf.partial_fit, [[0], [1]], [0, 1], - classes=[0, 1, 1]) + msg_1 = re.escape('Number of priors must match number of classes') + with pytest.raises(ValueError, match=msg_1): + clf.fit([[0], [1], [2]], [0, 1, 2]) + + msg_2 = re.escape('is not the same as on last call to partial_fit') + with pytest.raises(ValueError, match=msg_2): + clf.partial_fit([[0], [1]], [0, 1], classes=[0, 1, 1]) @pytest.mark.parametrize('DiscreteNaiveBayes', DISCRETE_NAIVE_BAYES_CLASSES) From 00eddc3bf18e0a032c0e1ac84d79f4fb27a5817d Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Fri, 26 Mar 2021 08:14:50 +0000 Subject: [PATCH 02/12] Fix the rest of the nb tests --- sklearn/tests/test_naive_bayes.py | 97 +++++++++++++++++++------------ 1 file changed, 60 insertions(+), 37 deletions(-) diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 382b5c51459ed..4c60b8ca9bf95 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -12,10 +12,6 @@ from sklearn.utils._testing import assert_almost_equal from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_raises -from sklearn.utils._testing import assert_raise_message -from sklearn.utils._testing import assert_warns -from sklearn.utils._testing import assert_no_warnings from sklearn.utils._testing import ignore_warnings from sklearn.naive_bayes import GaussianNB, BernoulliNB @@ -484,7 +480,10 @@ def test_mnnb(kind): # Check the ability to predict the learning set. clf = MultinomialNB() - assert_raises(ValueError, clf.fit, -X, y2) + + msg = re.escape('Negative values in data passed to') + with pytest.raises(ValueError, match=msg): + clf.fit(-X, y2) y_pred = clf.fit(X, y2).predict(X) assert_array_equal(y_pred, y2) @@ -532,18 +531,16 @@ def test_mnb_prior_unobserved_targets(): clf = MultinomialNB() - assert_no_warnings( - clf.partial_fit, X, y, classes=[0, 1, 2] - ) + with pytest.warns(None): + clf.partial_fit(X, y, classes=[0, 1, 2]) assert clf.predict([[0, 1]]) == 0 assert clf.predict([[1, 0]]) == 1 assert clf.predict([[1, 1]]) == 0 # add a training example with previously unobserved class - assert_no_warnings( - clf.partial_fit, [[1, 1]], [2] - ) + with pytest.warns(None): + clf.partial_fit([[1, 1]], [2]) assert clf.predict([[0, 1]]) == 0 assert clf.predict([[1, 0]]) == 1 @@ -680,7 +677,10 @@ def test_cnb(): # Verify inputs are nonnegative. clf = ComplementNB(alpha=1.0) - assert_raises(ValueError, clf.fit, -X, Y) + + msg = re.escape('Negative values in data passed to ComplementNB (input X)') + with pytest.raises(ValueError, match=msg): + clf.fit(-X, Y) clf.fit(X, Y) @@ -714,9 +714,13 @@ def test_categoricalnb(): # Check error is raised for X with negative entries X = np.array([[0, -1]]) y = np.array([1]) - error_msg = "Negative values in data passed to CategoricalNB (input X)" - assert_raise_message(ValueError, error_msg, clf.predict, X) - assert_raise_message(ValueError, error_msg, clf.fit, X, y) + error_msg = re.escape( + "Negative values in data passed to CategoricalNB (input X)" + ) + with pytest.raises(ValueError, match=error_msg): + clf.predict(X) + with pytest.raises(ValueError, match=error_msg): + clf.fit(X, y) # Test alpha X3_test = np.array([[2, 5]]) @@ -808,52 +812,67 @@ def test_alpha(): X = np.array([[1, 0], [1, 1]]) y = np.array([0, 1]) nb = BernoulliNB(alpha=0.) - assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1]) - assert_warns(UserWarning, nb.fit, X, y) + msg = re.escape( + "alpha too small will result in numeric errors," + " setting alpha = 1.0e-10" + ) + with pytest.warns(UserWarning, match=msg): + nb.partial_fit(X, y, classes=[0, 1]) + with pytest.warns(UserWarning, match=msg): + nb.fit(X, y) prob = np.array([[1, 0], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) nb = MultinomialNB(alpha=0.) - assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1]) - assert_warns(UserWarning, nb.fit, X, y) + with pytest.warns(UserWarning, match=msg): + nb.partial_fit(X, y, classes=[0, 1]) + with pytest.warns(UserWarning, match=msg): + nb.fit(X, y) prob = np.array([[2. / 3, 1. / 3], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) nb = CategoricalNB(alpha=0.) - assert_warns(UserWarning, nb.fit, X, y) + with pytest.warns(UserWarning, match=msg): + nb.fit(X, y) prob = np.array([[1., 0.], [0., 1.]]) assert_array_almost_equal(nb.predict_proba(X), prob) # Test sparse X X = scipy.sparse.csr_matrix(X) nb = BernoulliNB(alpha=0.) - assert_warns(UserWarning, nb.fit, X, y) + with pytest.warns(UserWarning, match=msg): + nb.fit(X, y) prob = np.array([[1, 0], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) nb = MultinomialNB(alpha=0.) - assert_warns(UserWarning, nb.fit, X, y) + with pytest.warns(UserWarning, match=msg): + nb.fit(X, y) prob = np.array([[2. / 3, 1. / 3], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) # Test for alpha < 0 X = np.array([[1, 0], [1, 1]]) y = np.array([0, 1]) - expected_msg = ('Smoothing parameter alpha = -1.0e-01. ' - 'alpha should be > 0.') + expected_msg = re.escape( + 'Smoothing parameter alpha = -1.0e-01. alpha should be > 0.' + ) b_nb = BernoulliNB(alpha=-0.1) m_nb = MultinomialNB(alpha=-0.1) c_nb = CategoricalNB(alpha=-0.1) - assert_raise_message(ValueError, expected_msg, b_nb.fit, X, y) - assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y) - assert_raise_message(ValueError, expected_msg, c_nb.fit, X, y) + with pytest.raises(ValueError, match=expected_msg): + b_nb.fit(X, y) + with pytest.raises(ValueError, match=expected_msg): + m_nb.fit(X, y) + with pytest.raises(ValueError, match=expected_msg): + c_nb.fit(X, y) b_nb = BernoulliNB(alpha=-0.1) m_nb = MultinomialNB(alpha=-0.1) - assert_raise_message(ValueError, expected_msg, b_nb.partial_fit, - X, y, classes=[0, 1]) - assert_raise_message(ValueError, expected_msg, m_nb.partial_fit, - X, y, classes=[0, 1]) + with pytest.raises(ValueError, match=expected_msg): + b_nb.partial_fit(X, y, classes=[0, 1]) + with pytest.raises(ValueError, match=expected_msg): + m_nb.partial_fit(X, y, classes=[0, 1]) def test_alpha_vector(): @@ -876,10 +895,12 @@ def test_alpha_vector(): # Test alpha non-negative alpha = np.array([1., -0.1]) - expected_msg = ('Smoothing parameter alpha = -1.0e-01. ' - 'alpha should be > 0.') m_nb = MultinomialNB(alpha=alpha) - assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y) + expected_msg = re.escape( + 'Smoothing parameter alpha = -1.0e-01. alpha should be > 0.' + ) + with pytest.raises(ValueError, match=expected_msg): + m_nb.fit(X, y) # Test that too small pseudo-counts are replaced ALPHA_MIN = 1e-10 @@ -893,9 +914,11 @@ def test_alpha_vector(): # Test correct dimensions alpha = np.array([1., 2., 3.]) m_nb = MultinomialNB(alpha=alpha) - expected_msg = ('alpha should be a scalar or a numpy array ' - 'with shape [n_features]') - assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y) + expected_msg = re.escape( + 'alpha should be a scalar or a numpy array with shape [n_features]' + ) + with pytest.raises(ValueError, match=expected_msg): + m_nb.fit(X, y) def test_check_accuracy_on_digits(): From 8dbd016479a9f60569861c7dd1f3409d57a63ca1 Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Thu, 1 Apr 2021 11:57:34 +0100 Subject: [PATCH 03/12] Fix message variable names --- sklearn/tests/test_naive_bayes.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 4c60b8ca9bf95..5332c62c188ab 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -345,12 +345,12 @@ def test_discretenb_provide_prior(DiscreteNaiveBayes): assert_array_almost_equal(prior, np.array([.5, .5])) # Inconsistent number of classes with prior - msg_1 = re.escape('Number of priors must match number of classes') - with pytest.raises(ValueError, match=msg_1): + msg = re.escape('Number of priors must match number of classes') + with pytest.raises(ValueError, match=msg): clf.fit([[0], [1], [2]], [0, 1, 2]) - msg_2 = re.escape('is not the same as on last call to partial_fit') - with pytest.raises(ValueError, match=msg_2): + msg = re.escape('is not the same as on last call to partial_fit') + with pytest.raises(ValueError, match=msg): clf.partial_fit([[0], [1]], [0, 1], classes=[0, 1, 1]) From a1ce7dec1fbe1cdccb7fa506cc981258f302a1a9 Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Thu, 1 Apr 2021 11:59:16 +0100 Subject: [PATCH 04/12] Add check record length for pytest.warns --- sklearn/tests/test_naive_bayes.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 5332c62c188ab..05279cf97f5f5 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -531,16 +531,18 @@ def test_mnb_prior_unobserved_targets(): clf = MultinomialNB() - with pytest.warns(None): + with pytest.warns(None) as record: clf.partial_fit(X, y, classes=[0, 1, 2]) + assert len(record) == 0 assert clf.predict([[0, 1]]) == 0 assert clf.predict([[1, 0]]) == 1 assert clf.predict([[1, 1]]) == 0 # add a training example with previously unobserved class - with pytest.warns(None): + with pytest.warns(None) as record: clf.partial_fit([[1, 1]], [2]) + assert len(record) == 0 assert clf.predict([[0, 1]]) == 0 assert clf.predict([[1, 0]]) == 1 From 79416a8c4c851a5354618e6cdae7fa20d49a2682 Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Thu, 1 Apr 2021 15:02:58 +0100 Subject: [PATCH 05/12] Update sklearn/tests/test_naive_bayes.py Co-authored-by: Thomas J. Fan --- sklearn/tests/test_naive_bayes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 05279cf97f5f5..dd619f2d34196 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -116,7 +116,7 @@ def test_gnb_neg_priors(): """Test whether an error is raised in case of negative priors""" clf = GaussianNB(priors=np.array([-1., 2.])) - msg = re.escape('Priors must be non-negative.') + msg = 'Priors must be non-negative' with pytest.raises(ValueError, match=msg): clf.fit(X, y) From 9bf03eb03558f1d0e45704bf0e9811cd0d849cd4 Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Thu, 1 Apr 2021 15:03:04 +0100 Subject: [PATCH 06/12] Update sklearn/tests/test_naive_bayes.py Co-authored-by: Thomas J. Fan --- sklearn/tests/test_naive_bayes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index dd619f2d34196..aca14ff253717 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -156,7 +156,7 @@ def test_gnb_prior_greater_one(): """Test if an error is raised if the sum of prior greater than one""" clf = GaussianNB(priors=np.array([2., 1.])) - msg = re.escape('The sum of the priors should be 1') + msg = 'The sum of the priors should be 1' with pytest.raises(ValueError, match=msg): clf.fit(X, y) From ebc220b20c6e6b32daba009a901b072e1f209222 Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Thu, 1 Apr 2021 15:03:10 +0100 Subject: [PATCH 07/12] Update sklearn/tests/test_naive_bayes.py Co-authored-by: Thomas J. Fan --- sklearn/tests/test_naive_bayes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index aca14ff253717..998fdd6bd3898 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -349,7 +349,7 @@ def test_discretenb_provide_prior(DiscreteNaiveBayes): with pytest.raises(ValueError, match=msg): clf.fit([[0], [1], [2]], [0, 1, 2]) - msg = re.escape('is not the same as on last call to partial_fit') + msg = 'is not the same as on last call to partial_fit' with pytest.raises(ValueError, match=msg): clf.partial_fit([[0], [1]], [0, 1], classes=[0, 1, 1]) From 54f2746a359654b1b26b5ccc26b887f4ee799f06 Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Thu, 1 Apr 2021 15:03:27 +0100 Subject: [PATCH 08/12] Update sklearn/tests/test_naive_bayes.py Co-authored-by: Thomas J. Fan --- sklearn/tests/test_naive_bayes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 998fdd6bd3898..7c33d3ee47fa6 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -481,7 +481,7 @@ def test_mnnb(kind): # Check the ability to predict the learning set. clf = MultinomialNB() - msg = re.escape('Negative values in data passed to') + msg = 'Negative values in data passed to' with pytest.raises(ValueError, match=msg): clf.fit(-X, y2) y_pred = clf.fit(X, y2).predict(X) From 5adafd89e51779c5115266037c5ad5bbd3015c06 Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Thu, 1 Apr 2021 15:03:34 +0100 Subject: [PATCH 09/12] Update sklearn/tests/test_naive_bayes.py Co-authored-by: Thomas J. Fan --- sklearn/tests/test_naive_bayes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 7c33d3ee47fa6..4a62e837fd2ff 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -147,7 +147,7 @@ def test_gnb_wrong_nb_priors(): from the number of class""" clf = GaussianNB(priors=np.array([.25, .25, .25, .25])) - msg = re.escape('Number of priors must match number of classes') + msg = 'Number of priors must match number of classes' with pytest.raises(ValueError, match=msg): clf.fit(X, y) From dbd2a45546d8b380d77582eae1a05e5a55fd4c3e Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Thu, 1 Apr 2021 15:03:43 +0100 Subject: [PATCH 10/12] Update sklearn/tests/test_naive_bayes.py Co-authored-by: Thomas J. Fan --- sklearn/tests/test_naive_bayes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 4a62e837fd2ff..036125cc30805 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -345,7 +345,7 @@ def test_discretenb_provide_prior(DiscreteNaiveBayes): assert_array_almost_equal(prior, np.array([.5, .5])) # Inconsistent number of classes with prior - msg = re.escape('Number of priors must match number of classes') + msg = 'Number of priors must match number of classes' with pytest.raises(ValueError, match=msg): clf.fit([[0], [1], [2]], [0, 1, 2]) From 72da64b0de3c99ee54160230dab6cb691d340efa Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Thu, 1 Apr 2021 15:06:15 +0100 Subject: [PATCH 11/12] Update sklearn/tests/test_naive_bayes.py Co-authored-by: Thomas J. Fan --- sklearn/tests/test_naive_bayes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 036125cc30805..0093863e57877 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -898,7 +898,7 @@ def test_alpha_vector(): # Test alpha non-negative alpha = np.array([1., -0.1]) m_nb = MultinomialNB(alpha=alpha) - expected_msg = re.escape( + expected_msg = ( 'Smoothing parameter alpha = -1.0e-01. alpha should be > 0.' ) with pytest.raises(ValueError, match=expected_msg): From 79223e45a44fd7a05aec6f9be5ea1b27f65db573 Mon Sep 17 00:00:00 2001 From: Alihan Zihna Date: Thu, 1 Apr 2021 15:06:22 +0100 Subject: [PATCH 12/12] Update sklearn/tests/test_naive_bayes.py Co-authored-by: Thomas J. Fan --- sklearn/tests/test_naive_bayes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 0093863e57877..251ba6698ab0f 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -814,7 +814,7 @@ def test_alpha(): X = np.array([[1, 0], [1, 1]]) y = np.array([0, 1]) nb = BernoulliNB(alpha=0.) - msg = re.escape( + msg = ( "alpha too small will result in numeric errors," " setting alpha = 1.0e-10" )