From e3002c5efc2a34defe65e017e2a0cc3b6f250c1a Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Fri, 20 Sep 2013 17:59:14 +0200 Subject: [PATCH] FIX #2372: non-shuffling StratifiedKFold implementation and updated tests --- doc/modules/cross_validation.rst | 27 ++- .../statistical_inference/model_selection.rst | 15 +- doc/whats_new.rst | 9 +- sklearn/cross_validation.py | 36 ++- sklearn/feature_selection/tests/test_rfe.py | 24 +- sklearn/tests/test_cross_validation.py | 222 +++++++++++++++--- sklearn/tests/test_naive_bayes.py | 14 +- 7 files changed, 261 insertions(+), 86 deletions(-) diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index 441fe445c691d..5f40613a0fa97 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -105,24 +105,24 @@ time):: >>> scores = cross_validation.cross_val_score( ... clf, iris.data, iris.target, cv=5) ... - >>> scores # doctest: +ELLIPSIS - array([ 1. ..., 0.96..., 0.9 ..., 0.96..., 1. ]) + >>> scores # doctest: +ELLIPSIS + array([ 0.96..., 1. ..., 0.96..., 0.96..., 1. ]) The mean score and the standard deviation of the score estimate are hence given by:: >>> print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) - Accuracy: 0.97 (+/- 0.07) + Accuracy: 0.98 (+/- 0.03) By default, the score computed at each CV iteration is the ``score`` method of the estimator. It is possible to change this by using the scoring parameter:: >>> from sklearn import metrics - >>> cross_validation.cross_val_score(clf, iris.data, iris.target, cv=5, - ... scoring='f1') - ... # doctest: +ELLIPSIS - array([ 1. ..., 0.96..., 0.89..., 0.96..., 1. ]) + >>> scores = cross_validation.cross_val_score(clf, iris.data, iris.target, + ... cv=5, scoring='f1') + >>> scores # doctest: +ELLIPSIS + array([ 0.96..., 1. ..., 0.96..., 0.96..., 1. ]) See :ref:`scoring_parameter` for details. In the case of the Iris dataset, the samples are balanced across target @@ -197,17 +197,18 @@ Stratified k-fold folds: each set contains approximately the same percentage of samples of each target class as the complete set. -Example of stratified 2-fold cross-validation on a dataset with 7 samples from -two unbalanced classes:: +Example of stratified 2-fold cross-validation on a dataset with 10 samples from +two slightly unbalanced classes:: >>> from sklearn.cross_validation import StratifiedKFold - >>> labels = [0, 0, 0, 1, 1, 1, 0] - >>> skf = StratifiedKFold(labels, 2) + >>> labels = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1] + >>> skf = StratifiedKFold(labels, 3) >>> for train, test in skf: ... print("%s %s" % (train, test)) - [1 4 6] [0 2 3 5] - [0 2 3 5] [1 4 6] + [2 3 6 7 8 9] [0 1 4 5] + [0 1 3 4 5 8 9] [2 6 7] + [0 1 2 4 5 6 7] [3 8 9] Leave-One-Out - LOO diff --git a/doc/tutorial/statistical_inference/model_selection.rst b/doc/tutorial/statistical_inference/model_selection.rst index b069c31d5ec69..e7c898a6d9e46 100644 --- a/doc/tutorial/statistical_inference/model_selection.rst +++ b/doc/tutorial/statistical_inference/model_selection.rst @@ -143,12 +143,12 @@ estimator during the construction and exposes an estimator API:: >>> gammas = np.logspace(-6, -1, 10) >>> clf = GridSearchCV(estimator=svc, param_grid=dict(gamma=gammas), ... n_jobs=-1) - >>> clf.fit(X_digits[:1000], y_digits[:1000]) # doctest: +ELLIPSIS + >>> clf.fit(X_digits[:1000], y_digits[:1000]) # doctest: +ELLIPSIS GridSearchCV(cv=None,... - >>> clf.best_score_ # doctest: +ELLIPSIS - 0.9889... - >>> clf.best_estimator_.gamma - 9.9999999999999995e-07 + >>> clf.best_score_ # doctest: +ELLIPSIS + 0.924... + >>> clf.best_estimator_.gamma == 1e-6 + True >>> # Prediction performance on test set is not as good as on train set >>> clf.score(X_digits[1000:], y_digits[1000:]) @@ -163,8 +163,9 @@ a stratified 3-fold. :: - >>> cross_validation.cross_val_score(clf, X_digits, y_digits) - array([ 0.97996661, 0.98163606, 0.98330551]) + >>> cross_validation.cross_val_score(clf, X_digits, y_digits) + ... # doctest: +ELLIPSIS + array([ 0.935..., 0.958..., 0.937...]) Two cross-validation loops are performed in parallel: one by the :class:`GridSearchCV` estimator to set `gamma` and the other one by diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 39a55d31353ce..620adef0edc24 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -44,6 +44,11 @@ Changelog - Memory improvements of extra trees and random forest by `Arnaud Joly`_. + - Changed :class:`cross_validation.StratifiedKFold` to try and + preserve as much of the original ordering of samples as possible so as + not to hide overfitting on datasets with a non-negligible level of + samples dependency. + By `Daniel Nouri`_ and `Olivier Grisel`_. API changes summary ------------------- @@ -781,7 +786,7 @@ List of contributors for release 0.13 by number of commits. * 17 `Fabian Pedregosa`_ * 17 Nelle Varoquaux * 16 `Christian Osendorfer`_ - * 14 Daniel Nouri + * 14 `Daniel Nouri`_ * 13 `Virgile Fritsch`_ * 13 syhw * 12 `Satrajit Ghosh`_ @@ -2288,3 +2293,5 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson. .. _Kyle Kastner: http://kastnerkyle.github.io .. _@FedericoV: https://github.com/FedericoV/ + +.. _Daniel Nouri: http://danielnouri.org \ No newline at end of file diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index d08f76f975a30..a4e1763a6684f 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -9,6 +9,7 @@ # License: BSD 3 clause from __future__ import print_function +from __future__ import division import warnings from itertools import chain, combinations @@ -375,21 +376,42 @@ class StratifiedKFold(_BaseKFold): def __init__(self, y, n_folds=3, indices=None): super(StratifiedKFold, self).__init__(len(y), n_folds, indices) y = np.asarray(y) - _, y_sorted = unique(y, return_inverse=True) - min_labels = np.min(np.bincount(y_sorted)) + n_samples = y.shape[0] + unique_labels, y_inversed = unique(y, return_inverse=True) + label_counts = np.bincount(y_inversed) + min_labels = np.min(label_counts) if self.n_folds > min_labels: warnings.warn(("The least populated class in y has only %d" " members, which is too few. The minimum" " number of labels for any class cannot" " be less than n_folds=%d." % (min_labels, self.n_folds)), Warning) + + # pre-assign each sample to a test fold index using individual KFold + # splitting strategies for each label so as to respect the + # balance of labels + per_label_cvs = [KFold(max(c, self.n_folds), self.n_folds) + for c in label_counts] + test_folds = np.zeros(n_samples, dtype=np.int) + for test_fold_idx, per_label_splits in enumerate(zip(*per_label_cvs)): + for label, (_, test_split) in zip(unique_labels, per_label_splits): + label_test_folds = test_folds[y == label] + # the test split can be too big because we used + # KFold(max(c, self.n_folds), self.n_folds) instead of + # KFold(c, self.n_folds) to make it possible to not crash even + # if the data is not 100% stratifiable for all the labels + # (we use a warning instead of raising an exception) + # If this is the case, let's trim it: + test_split = test_split[test_split < len(label_test_folds)] + label_test_folds[test_split] = test_fold_idx + test_folds[y == label] = label_test_folds + + self.test_folds = test_folds self.y = y - def _iter_test_indices(self): - n_folds = self.n_folds - idx = np.argsort(self.y) - for i in range(n_folds): - yield idx[i::n_folds] + def _iter_test_masks(self): + for i in range(self.n_folds): + yield self.test_folds == i def __repr__(self): return '%s.%s(labels=%s, n_folds=%i)' % ( diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 853634ca3c700..220e667406ee4 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -69,39 +69,35 @@ def test_rfecv(): y = list(iris.target) # regression test: list should be supported # Test using the score function - rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=3) + rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5) rfecv.fit(X, y) # non-regression test for missing worst feature: assert_equal(len(rfecv.grid_scores_), X.shape[1]) assert_equal(len(rfecv.ranking_), X.shape[1]) X_r = rfecv.transform(X) + # All the noisy variable were filtered out + assert_array_equal(X_r, iris.data) + # same in sparse - rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=1, cv=3) + rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5) X_sparse = sparse.csr_matrix(X) rfecv_sparse.fit(X_sparse, y) X_r_sparse = rfecv_sparse.transform(X_sparse) - - assert_equal(X_r.shape, iris.data.shape) - assert_array_almost_equal(X_r[:10], iris.data[:10]) - assert_array_almost_equal(X_r_sparse.toarray(), X_r) + assert_array_equal(X_r_sparse.toarray(), iris.data) # Test using a customized loss function - rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=3, + rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5, loss_func=zero_one_loss) with warnings.catch_warnings(record=True): rfecv.fit(X, y) X_r = rfecv.transform(X) - - assert_equal(X_r.shape, iris.data.shape) - assert_array_almost_equal(X_r[:10], iris.data[:10]) + assert_array_equal(X_r, iris.data) # Test using a scorer scorer = SCORERS['accuracy'] - rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=3, + rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5, scoring=scorer) rfecv.fit(X, y) X_r = rfecv.transform(X) - - assert_equal(X_r.shape, iris.data.shape) - assert_array_almost_equal(X_r[:10], iris.data[:10]) + assert_array_equal(X_r, iris.data) diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py index 559bada01b70e..986624137f461 100644 --- a/sklearn/tests/test_cross_validation.py +++ b/sklearn/tests/test_cross_validation.py @@ -1,5 +1,5 @@ """Test the cross_validation module""" - +from __future__ import division import warnings import numpy as np @@ -22,6 +22,7 @@ from sklearn import cross_validation as cval from sklearn.base import BaseEstimator from sklearn.datasets import make_regression +from sklearn.datasets import load_digits from sklearn.datasets import load_iris from sklearn.metrics import accuracy_score from sklearn.metrics import f1_score @@ -91,6 +92,37 @@ def score(self, X=None, Y=None): ############################################################################## # Tests +def check_valid_split(train, test, n_samples=None): + # Use python sets to get more informative assertion failure messages + train, test = set(train), set(test) + + # Train and test split should not overlap + assert_equal(train.intersection(test), set()) + + if n_samples is not None: + # Check that the union of train an test split cover all the indices + assert_equal(train.union(test), set(range(n_samples))) + + +def check_cv_coverage(cv, expected_n_iter=None, n_samples=None): + # Check that a all the samples appear at least once in a test fold + if expected_n_iter is not None: + assert_equal(len(cv), expected_n_iter) + else: + expected_n_iter = len(cv) + + collected_test_samples = set() + iterations = 0 + for train, test in cv: + check_valid_split(train, test, n_samples=n_samples) + iterations += 1 + collected_test_samples.update(test) + + # Check that the accumulated test samples cover the whole dataset + assert_equal(iterations, expected_n_iter) + if n_samples is not None: + assert_equal(collected_test_samples, set(range(n_samples))) + def test_kfold_valueerrors(): # Check that errors are raised if there is not enough samples @@ -100,8 +132,8 @@ def test_kfold_valueerrors(): # members. with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') - y = [0, 0, 1, 1, 2] - cval.StratifiedKFold(y, 3) + y = [3, 3, -1, -1, 2] + cv = cval.StratifiedKFold(y, 3) # checking there was only one warning. assert_equal(len(w), 1) # checking it has the right type @@ -110,6 +142,11 @@ def test_kfold_valueerrors(): # a characteristic of the code and not a behavior assert_true("The least populated class" in str(w[0])) + # Check that despite the warning the folds are still computed even + # though all the classes are not necessarily represented at on each + # side of the split at each split + check_cv_coverage(cv, expected_n_iter=3, n_samples=len(y)) + # Error when number of folds is <= 1 assert_raises(ValueError, cval.KFold, 2, 0) assert_raises(ValueError, cval.KFold, 2, 1) @@ -127,15 +164,72 @@ def test_kfold_valueerrors(): def test_kfold_indices(): # Check all indices are returned in the test folds kf = cval.KFold(300, 3) - all_folds = None - for train, test in kf: - if all_folds is None: - all_folds = test.copy() - else: - all_folds = np.concatenate((all_folds, test)) - - all_folds.sort() - assert_array_equal(all_folds, np.arange(300)) + check_cv_coverage(kf, expected_n_iter=3, n_samples=300) + + # Check all indices are returned in the test folds even when equal-sized + # folds are not possible + kf = cval.KFold(17, 3) + check_cv_coverage(kf, expected_n_iter=3, n_samples=17) + + +def test_kfold_no_shuffle(): + # Manually check that KFold preserves the data ordering on toy datasets + splits = iter(cval.KFold(4, 2)) + train, test = splits.next() + assert_array_equal(test, [0, 1]) + assert_array_equal(train, [2, 3]) + + train, test = splits.next() + assert_array_equal(test, [2, 3]) + assert_array_equal(train, [0, 1]) + + splits = iter(cval.KFold(5, 2)) + train, test = splits.next() + assert_array_equal(test, [0, 1, 2]) + assert_array_equal(train, [3, 4]) + + train, test = splits.next() + assert_array_equal(test, [3, 4]) + assert_array_equal(train, [0, 1, 2]) + + +def test_stratified_kfold_no_shuffle(): + # Manually check that StratifiedKFold preserves the data ordering as much + # as possible on toy datasets in order to avoid hiding sample dependencies + # when possible + splits = iter(cval.StratifiedKFold([1, 1, 0, 0], 2)) + train, test = splits.next() + assert_array_equal(test, [0, 2]) + assert_array_equal(train, [1, 3]) + + train, test = splits.next() + assert_array_equal(test, [1, 3]) + assert_array_equal(train, [0, 2]) + + splits = iter(cval.StratifiedKFold([1, 1, 1, 0, 0, 0, 0], 2)) + train, test = splits.next() + assert_array_equal(test, [0, 1, 3, 4]) + assert_array_equal(train, [2, 5, 6]) + + train, test = splits.next() + assert_array_equal(test, [2, 5, 6]) + assert_array_equal(train, [0, 1, 3, 4]) + + +def test_stratified_kfold_ratios(): + # Check that stratified kfold preserves label ratios in individual splits + n_samples = 1000 + labels = np.array([4] * int(0.10 * n_samples) + + [0] * int(0.89 * n_samples) + + [1] * int(0.01 * n_samples)) + + for train, test in cval.StratifiedKFold(labels, 5): + assert_almost_equal(np.sum(labels[train] == 4) / len(train), 0.10, 2) + assert_almost_equal(np.sum(labels[train] == 0) / len(train), 0.89, 2) + assert_almost_equal(np.sum(labels[train] == 1) / len(train), 0.01, 2) + assert_almost_equal(np.sum(labels[test] == 4) / len(test), 0.10, 2) + assert_almost_equal(np.sum(labels[test] == 0) / len(test), 0.89, 2) + assert_almost_equal(np.sum(labels[test] == 1) / len(test), 0.01, 2) def test_kfold_balance(): @@ -149,30 +243,84 @@ def test_kfold_balance(): assert_equal(np.sum(sizes), kf.n) -@ignore_warnings +def test_stratifiedkfold_balance(): + # Check that KFold returns folds with balanced sizes (only when + # stratification is possible) + labels = [0] * 3 + [1] * 14 + for skf in [cval.StratifiedKFold(labels[:i], 3) for i in range(11, 17)]: + sizes = [] + for _, test in skf: + sizes.append(len(test)) + + assert_true((np.max(sizes) - np.min(sizes)) <= 1) + assert_equal(np.sum(sizes), skf.n) + + def test_shuffle_kfold(): # Check the indices are shuffled properly, and that all indices are # returned in the different test folds - kf1 = cval.KFold(300, 3, shuffle=True, random_state=0, indices=True) - kf2 = cval.KFold(300, 3, shuffle=True, random_state=0, indices=False) + kf = cval.KFold(300, 3, shuffle=True, random_state=0) ind = np.arange(300) - for kf in (kf1, kf2): - all_folds = None - for train, test in kf: - sorted_array = np.arange(100) - assert_true(np.any(sorted_array != ind[train])) - sorted_array = np.arange(101, 200) - assert_true(np.any(sorted_array != ind[train])) - sorted_array = np.arange(201, 300) - assert_true(np.any(sorted_array != ind[train])) - if all_folds is None: - all_folds = ind[test].copy() - else: - all_folds = np.concatenate((all_folds, ind[test])) + all_folds = None + for train, test in kf: + sorted_array = np.arange(100) + assert_true(np.any(sorted_array != ind[train])) + sorted_array = np.arange(101, 200) + assert_true(np.any(sorted_array != ind[train])) + sorted_array = np.arange(201, 300) + assert_true(np.any(sorted_array != ind[train])) + if all_folds is None: + all_folds = ind[test].copy() + else: + all_folds = np.concatenate((all_folds, ind[test])) + + all_folds.sort() + assert_array_equal(all_folds, ind) + + +def test_kfold_can_detect_dependent_samples_on_digits(): # see #2372 + # The digits samples are dependent: they are apparently grouped by authors + # although we don't have any information on the groups segment locations + # for this data. We can highlight this fact be computing k-fold cross- + # validation with and without shuffling: we observe that the shuffling case + # wrongly makes the IID assumption and is therefore too optimistic: it + # estimates a much higher accuracy (around 0.96) than than the non + # shuffling variant (around 0.86). + + digits = load_digits() + X, y = digits.data[:800], digits.target[:800] + model = SVC(C=10, gamma=0.005) + n = len(y) + + cv = cval.KFold(n, 5, shuffle=False) + mean_score = cval.cross_val_score(model, X, y, cv=cv).mean() + assert_greater(0.88, mean_score) + assert_greater(mean_score, 0.85) + + # Shuffling the data artificially breaks the dependency and hides the + # overfitting of the model w.r.t. the writing style of the authors + # by yielding a seriously overestimated score: + + cv = cval.KFold(n, 5, shuffle=True, random_state=0) + mean_score = cval.cross_val_score(model, X, y, cv=cv).mean() + assert_greater(mean_score, 0.95) + + cv = cval.KFold(n, 5, shuffle=True, random_state=1) + mean_score = cval.cross_val_score(model, X, y, cv=cv).mean() + assert_greater(mean_score, 0.95) + + # Similarly, StratifiedKFold should try to shuffle the data as little + # as possible (while respecting the balanced class constraints) + # and thus be able to detect the dependency by not overestimating + # the CV score either. As the digits dataset is approximately balanced + # the estimated mean score is close to the score measured with + # non-shuffled KFold - all_folds.sort() - assert_array_equal(all_folds, ind) + cv = cval.StratifiedKFold(y, 5) + mean_score = cval.cross_val_score(model, X, y, cv=cv).mean() + assert_greater(0.88, mean_score) + assert_greater(mean_score, 0.85) def test_shuffle_split(): @@ -379,24 +527,24 @@ def test_cross_val_score_with_score_func_classification(): # Default score (should be the accuracy score) scores = cval.cross_val_score(clf, iris.data, iris.target, cv=5) - assert_array_almost_equal(scores, [1., 0.97, 0.90, 0.97, 1.], 2) + assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2) # Correct classification score (aka. zero / one score) - should be the # same as the default estimator score zo_scores = cval.cross_val_score(clf, iris.data, iris.target, scoring="accuracy", cv=5) - assert_array_almost_equal(zo_scores, [1., 0.97, 0.90, 0.97, 1.], 2) + assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2) # F1 score (class are balanced so f1_score should be equal to zero/one # score f1_scores = cval.cross_val_score(clf, iris.data, iris.target, scoring="f1", cv=5) - assert_array_almost_equal(f1_scores, [1., 0.97, 0.90, 0.97, 1.], 2) + assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2) # also test deprecated old way with warnings.catch_warnings(record=True): f1_scores = cval.cross_val_score(clf, iris.data, iris.target, score_func=f1_score, cv=5) - assert_array_almost_equal(f1_scores, [1., 0.97, 0.90, 0.97, 1.], 2) + assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2) def test_cross_val_score_with_score_func_regression(): @@ -450,7 +598,7 @@ def test_permutation_score(): score_label, _, pvalue_label = cval.permutation_test_score( svm, X, y, scoring=scorer, cv=cv, labels=np.ones(y.size), random_state=0) - assert_almost_equal(score_label, .95, 2) + assert_almost_equal(score_label, .97, 2) assert_almost_equal(pvalue_label, 0.01, 3) # check that we obtain the same results with a sparse representation @@ -470,14 +618,14 @@ def test_permutation_score(): scoring="accuracy") assert_less(score, 0.5) - assert_greater(pvalue, 0.4) + assert_greater(pvalue, 0.2) # test with deprecated interface with warnings.catch_warnings(record=True): score, scores, pvalue = cval.permutation_test_score( svm, X, y, score_func=accuracy_score, cv=cv) assert_less(score, 0.5) - assert_greater(pvalue, 0.4) + assert_greater(pvalue, 0.2) def test_cross_val_generator_with_mask(): diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 77634726a386a..5ff2953896091 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -329,8 +329,8 @@ def test_coef_intercept_shape(): def test_check_accuracy_on_digits(): # Non regression test to make sure that any further refactoring / optim - # of the NB models do not harm the performance on a non linearly separable - # dataset + # of the NB models do not harm the performance on a slightly non-linearly + # separable dataset digits = load_digits() X, y = digits.data, digits.target binary_3v8 = np.logical_or(digits.target == 3, digits.target == 8) @@ -338,21 +338,21 @@ def test_check_accuracy_on_digits(): # Multinomial NB scores = cross_val_score(MultinomialNB(alpha=10), X, y, cv=10) - assert_greater(scores.mean(), 0.90) + assert_greater(scores.mean(), 0.86) scores = cross_val_score(MultinomialNB(alpha=10), X_3v8, y_3v8, cv=10) - assert_greater(scores.mean(), 0.95) + assert_greater(scores.mean(), 0.94) # Bernoulli NB scores = cross_val_score(BernoulliNB(alpha=10), X > 4, y, cv=10) - assert_greater(scores.mean(), 0.85) + assert_greater(scores.mean(), 0.83) scores = cross_val_score(BernoulliNB(alpha=10), X_3v8 > 4, y_3v8, cv=10) - assert_greater(scores.mean(), 0.94) + assert_greater(scores.mean(), 0.92) # Gaussian NB scores = cross_val_score(GaussianNB(), X, y, cv=10) - assert_greater(scores.mean(), 0.81) + assert_greater(scores.mean(), 0.77) scores = cross_val_score(GaussianNB(), X_3v8, y_3v8, cv=10) assert_greater(scores.mean(), 0.86)