diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 57a50d325497e..764808b7fdc86 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -975,7 +975,8 @@ The following example shows how to fit the majority rule classifier:: >>> iris = datasets.load_iris() >>> X, y = iris.data[:, 1:3], iris.target - >>> clf1 = LogisticRegression(random_state=1) + >>> clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial', + ... random_state=1) >>> clf2 = RandomForestClassifier(n_estimators=50, random_state=1) >>> clf3 = GaussianNB() @@ -984,10 +985,10 @@ The following example shows how to fit the majority rule classifier:: >>> for clf, label in zip([clf1, clf2, clf3, eclf], ['Logistic Regression', 'Random Forest', 'naive Bayes', 'Ensemble']): ... scores = cross_val_score(clf, X, y, cv=5, scoring='accuracy') ... print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label)) - Accuracy: 0.90 (+/- 0.05) [Logistic Regression] + Accuracy: 0.95 (+/- 0.04) [Logistic Regression] Accuracy: 0.94 (+/- 0.04) [Random Forest] Accuracy: 0.91 (+/- 0.04) [naive Bayes] - Accuracy: 0.95 (+/- 0.05) [Ensemble] + Accuracy: 0.95 (+/- 0.04) [Ensemble] Weighted Average Probabilities (Soft Voting) @@ -1060,7 +1061,8 @@ The `VotingClassifier` can also be used together with `GridSearch` in order to tune the hyperparameters of the individual estimators:: >>> from sklearn.model_selection import GridSearchCV - >>> clf1 = LogisticRegression(random_state=1) + >>> clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial', + ... random_state=1) >>> clf2 = RandomForestClassifier(random_state=1) >>> clf3 = GaussianNB() >>> eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft') diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst index 968cc663f9431..ab6b2994835f9 100644 --- a/doc/modules/linear_model.rst +++ b/doc/modules/linear_model.rst @@ -775,15 +775,20 @@ The "saga" solver [7]_ is a variant of "sag" that also supports the non-smooth `penalty="l1"` option. This is therefore the solver of choice for sparse multinomial logistic regression. -In a nutshell, one may choose the solver with the following rules: - -================================= ===================================== -Case Solver -================================= ===================================== -L1 penalty "liblinear" or "saga" -Multinomial loss "lbfgs", "sag", "saga" or "newton-cg" -Very Large dataset (`n_samples`) "sag" or "saga" -================================= ===================================== +In a nutshell, the following table summarizes the solvers characteristics: + +============================ =========== ======= =========== ===== ====== +solver 'liblinear' 'lbfgs' 'newton-cg' 'sag' 'saga' +============================ =========== ======= =========== ===== ====== +Multinomial + L2 penalty no yes yes yes yes +OVR + L2 penalty yes yes yes yes yes +Multinomial + L1 penalty no no no no yes +OVR + L1 penalty yes no no no yes +============================ =========== ======= =========== ===== ====== +Penalize the intercept (bad) yes no no no no +Faster for large datasets no no no yes yes +Robust to unscaled datasets yes yes yes no no +============================ =========== ======= =========== ===== ====== The "saga" solver is often the best choice. The "liblinear" solver is used by default for historical reasons. diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst index e60751b7f688a..2c12f1038c285 100644 --- a/doc/tutorial/statistical_inference/supervised_learning.rst +++ b/doc/tutorial/statistical_inference/supervised_learning.rst @@ -374,12 +374,13 @@ function or **logistic** function: :: - >>> logistic = linear_model.LogisticRegression(C=1e5) - >>> logistic.fit(iris_X_train, iris_y_train) + >>> log = linear_model.LogisticRegression(solver='lbfgs', C=1e5, + ... multi_class='multinomial') + >>> log.fit(iris_X_train, iris_y_train) # doctest: +NORMALIZE_WHITESPACE LogisticRegression(C=100000.0, class_weight=None, dual=False, - fit_intercept=True, intercept_scaling=1, max_iter=100, - multi_class='ovr', n_jobs=None, penalty='l2', random_state=None, - solver='liblinear', tol=0.0001, verbose=0, warm_start=False) + fit_intercept=True, intercept_scaling=1, max_iter=100, + multi_class='multinomial', n_jobs=None, penalty='l2', random_state=None, + solver='lbfgs', tol=0.0001, verbose=0, warm_start=False) This is known as :class:`LogisticRegression`. diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index df346696734a0..627956342a70f 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -161,6 +161,7 @@ Support for Python 3.3 has been officially dropped. by `Andreas Müller`_ and :user:`Guillaume Lemaitre `. + :mod:`sklearn.covariance` ......................... @@ -504,6 +505,12 @@ Support for Python 3.3 has been officially dropped. ValueError. :issue:`11327` by :user:`Karan Dhingra ` and `Joel Nothman`_. +- |API| The default values of the ``solver`` and ``multi_class`` parameters of + :class:`linear_model.LogisticRegression` will change respectively from + ``'liblinear'`` and ``'ovr'`` in version 0.20 to ``'lbfgs'`` and + ``'auto'`` in version 0.22. A FutureWarning is raised when the default + values are used. :issue:`11905` by `Tom Dupre la Tour`_ and `Joel Nothman`_. + - |API| Deprecate ``positive=True`` option in :class:`linear_model.Lars` as the underlying implementation is broken. Use :class:`linear_model.Lasso` instead. :issue:`9837` by `Alexandre Gramfort`_. diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py index 505ec2f17b248..608df3dc43bce 100644 --- a/sklearn/ensemble/tests/test_bagging.py +++ b/sklearn/ensemble/tests/test_bagging.py @@ -293,6 +293,8 @@ def test_bootstrap_features(): assert_greater(boston.data.shape[1], np.unique(features).shape[0]) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_probability(): # Predict probabilities. rng = check_random_state(0) @@ -712,6 +714,8 @@ def test_oob_score_consistency(): assert_equal(bagging.fit(X, y).oob_score_, bagging.fit(X, y).oob_score_) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_estimators_samples(): # Check that format of estimators_samples_ is correct and that results # generated at fit time can be identically reproduced at a later time @@ -748,6 +752,8 @@ def test_estimators_samples(): assert_array_almost_equal(orig_coefs, new_coefs) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_estimators_samples_deterministic(): # This test is a regression test to check that with a random step # (e.g. SparseRandomProjection) and a given random state, the results diff --git a/sklearn/ensemble/tests/test_voting_classifier.py b/sklearn/ensemble/tests/test_voting_classifier.py index f5bfdbd101beb..c480d8381f651 100644 --- a/sklearn/ensemble/tests/test_voting_classifier.py +++ b/sklearn/ensemble/tests/test_voting_classifier.py @@ -28,6 +28,8 @@ X, y = iris.data[:, 1:3], iris.target +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_estimator_init(): eclf = VotingClassifier(estimators=[]) msg = ('Invalid `estimators` attribute, `estimators` should be' @@ -59,6 +61,8 @@ def test_estimator_init(): assert_raise_message(ValueError, msg, eclf.fit, X, y) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_predictproba_hardvoting(): eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()), ('lr2', LogisticRegression())], @@ -67,6 +71,8 @@ def test_predictproba_hardvoting(): assert_raise_message(AttributeError, msg, eclf.predict_proba, X) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_notfitted(): eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()), ('lr2', LogisticRegression())], @@ -76,6 +82,8 @@ def test_notfitted(): assert_raise_message(NotFittedError, msg, eclf.predict_proba, X) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_majority_label_iris(): """Check classification by majority label on dataset iris.""" @@ -92,7 +100,8 @@ def test_majority_label_iris(): @pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_tie_situation(): """Check voting classifier selects smaller class label in tie situation.""" - clf1 = LogisticRegression(random_state=123) + clf1 = LogisticRegression(random_state=123, multi_class='ovr', + solver='liblinear') clf2 = RandomForestClassifier(random_state=123) eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2)], voting='hard') @@ -101,6 +110,8 @@ def test_tie_situation(): assert_equal(eclf.fit(X, y).predict(X)[73], 1) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_weights_iris(): """Check classification by average probabilities on dataset iris.""" @@ -115,6 +126,8 @@ def test_weights_iris(): assert_almost_equal(scores.mean(), 0.93, decimal=2) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_predict_on_toy_problem(): """Manually check predicted class labels for toy dataset.""" @@ -148,6 +161,8 @@ def test_predict_on_toy_problem(): assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2])) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_predict_proba_on_toy_problem(): """Calculate predicted probabilities on toy dataset.""" @@ -216,6 +231,8 @@ def test_multilabel(): return +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_gridsearch(): """Check GridSearch support.""" @@ -234,6 +251,8 @@ def test_gridsearch(): grid.fit(iris.data, iris.target) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_parallel_fit(): """Check parallel backend of VotingClassifier on toy dataset.""" @@ -256,6 +275,8 @@ def test_parallel_fit(): assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_sample_weight(): """Tests sample_weight parameter of VotingClassifier""" @@ -300,6 +321,8 @@ def fit(self, X, y, *args, **sample_weight): eclf.fit(X, y, sample_weight=np.ones((len(y),))) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_set_params(): """set_params should be able to set estimators""" @@ -335,6 +358,8 @@ def test_set_params(): eclf1.get_params()["lr"].get_params()['C']) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_set_estimator_none(): """VotingClassifier set_params should be able to set estimators as None""" @@ -390,6 +415,8 @@ def test_set_estimator_none(): assert_array_equal(eclf2.transform(X1), np.array([[0], [1]])) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_estimator_weights_format(): # Test estimator weights inputs as list and array @@ -408,6 +435,8 @@ def test_estimator_weights_format(): assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_transform(): """Check transform method of VotingClassifier on toy dataset.""" diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py index e585947f11aad..2119d49ef4ccb 100644 --- a/sklearn/ensemble/voting_classifier.py +++ b/sklearn/ensemble/voting_classifier.py @@ -90,7 +90,8 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin): >>> from sklearn.linear_model import LogisticRegression >>> from sklearn.naive_bayes import GaussianNB >>> from sklearn.ensemble import RandomForestClassifier, VotingClassifier - >>> clf1 = LogisticRegression(random_state=1) + >>> clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial', + ... random_state=1) >>> clf2 = RandomForestClassifier(n_estimators=50, random_state=1) >>> clf3 = GaussianNB() >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index e6bb76c5e19a9..47e62eb8e7168 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -8,7 +8,6 @@ from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_raises from sklearn.utils.testing import skip_if_32bit @@ -178,6 +177,8 @@ def test_feature_importances(): assert_array_almost_equal(X_new, X[:, feature_mask]) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_sample_weight(): # Ensure sample weights are passed to underlying estimator X, y = datasets.make_classification( @@ -214,6 +215,8 @@ def test_coef_default_threshold(): assert_array_almost_equal(X_new, X[:, mask]) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @skip_if_32bit def test_2d_coef(): X, y = datasets.make_classification( diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 7494b650dc9bc..44dda94b80e57 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -424,35 +424,61 @@ def hessp(v): return grad, hessp -def _check_solver_option(solver, multi_class, penalty, dual): - if solver not in ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga']: - raise ValueError("Logistic Regression supports only liblinear, " - "newton-cg, lbfgs, sag and saga solvers, got %s" - % solver) - - if multi_class not in ['multinomial', 'ovr']: - raise ValueError("multi_class should be either multinomial or " - "ovr, got %s" % multi_class) - +def _check_solver(solver, penalty, dual): + if solver == 'warn': + solver = 'liblinear' + warnings.warn("Default solver will be changed to 'lbfgs' in 0.22. " + "Specify a solver to silence this warning.", + FutureWarning) + + all_solvers = ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga'] + if solver not in all_solvers: + raise ValueError("Logistic Regression supports only solvers in %s, got" + " %s." % (all_solvers, solver)) + + all_penalties = ['l1', 'l2'] + if penalty not in all_penalties: + raise ValueError("Logistic Regression supports only penalties in %s," + " got %s." % (all_penalties, penalty)) + + if solver not in ['liblinear', 'saga'] and penalty != 'l2': + raise ValueError("Solver %s supports only l2 penalties, " + "got %s penalty." % (solver, penalty)) + if solver != 'liblinear' and dual: + raise ValueError("Solver %s supports only " + "dual=False, got dual=%s" % (solver, dual)) + return solver + + +def _check_multi_class(multi_class, solver, n_classes): + if multi_class == 'warn': + multi_class = 'ovr' + if n_classes > 2: + warnings.warn("Default multi_class will be changed to 'auto' in" + " 0.22. Specify the multi_class option to silence " + "this warning.", FutureWarning) + if multi_class == 'auto': + if solver == 'liblinear': + multi_class = 'ovr' + elif n_classes > 2: + multi_class = 'multinomial' + else: + multi_class = 'ovr' + if multi_class not in ('multinomial', 'ovr'): + raise ValueError("multi_class should be 'multinomial', 'ovr' or " + "'auto'. Got %s." % multi_class) if multi_class == 'multinomial' and solver == 'liblinear': raise ValueError("Solver %s does not support " "a multinomial backend." % solver) + return multi_class - if solver not in ['liblinear', 'saga']: - if penalty != 'l2': - raise ValueError("Solver %s supports only l2 penalties, " - "got %s penalty." % (solver, penalty)) - if solver != 'liblinear': - if dual: - raise ValueError("Solver %s supports only " - "dual=False, got dual=%s" % (solver, dual)) def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, max_iter=100, tol=1e-4, verbose=0, solver='lbfgs', coef=None, class_weight=None, dual=False, penalty='l2', - intercept_scaling=1., multi_class='ovr', + intercept_scaling=1., multi_class='warn', random_state=None, check_input=True, max_squared_sum=None, sample_weight=None): """Compute a Logistic Regression model for a list of regularization @@ -471,7 +497,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, X : array-like or sparse matrix, shape (n_samples, n_features) Input data. - y : array-like, shape (n_samples,) + y : array-like, shape (n_samples,) or (n_samples, n_targets) Input data, target values. pos_class : int, None @@ -540,12 +566,18 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased. - multi_class : str, {'ovr', 'multinomial'} - Multiclass option can be either 'ovr' or 'multinomial'. If the option - chosen is 'ovr', then a binary problem is fit for each label. Else - the loss minimised is the multinomial loss fit across - the entire probability distribution. Does not work for 'liblinear' - solver. + multi_class : str, {'ovr', 'multinomial', 'auto'}, default: 'ovr' + If the option chosen is 'ovr', then a binary problem is fit for each + label. For 'multinomial' the loss minimised is the multinomial loss fit + across the entire probability distribution, *even when the data is + binary*. 'multinomial' is unavailable when solver='liblinear'. + 'auto' selects 'ovr' if the data is binary, or if solver='liblinear', + and otherwise selects 'multinomial'. + + .. versionadded:: 0.18 + Stochastic Average Gradient descent solver for 'multinomial' case. + .. versionchanged:: 0.20 + Default will change from 'ovr' to 'auto' in 0.22. random_state : int, RandomState instance or None, optional, default None The seed of the pseudo random number generator to use when shuffling @@ -593,7 +625,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, if isinstance(Cs, numbers.Integral): Cs = np.logspace(-4, 4, Cs) - _check_solver_option(solver, multi_class, penalty, dual) + solver = _check_solver(solver, penalty, dual) # Preprocessing. if check_input: @@ -602,9 +634,11 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, y = check_array(y, ensure_2d=False, dtype=None) check_consistent_length(X, y) _, n_features = X.shape + classes = np.unique(y) random_state = check_random_state(random_state) + multi_class = _check_multi_class(multi_class, solver, len(classes)) if pos_class is None and multi_class != 'multinomial': if (classes.size > 2): raise ValueError('To fit OvR, use the pos_class argument') @@ -761,8 +795,9 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, "'newton-cg', 'sag'}, got '%s' instead" % solver) if multi_class == 'multinomial': - multi_w0 = np.reshape(w0, (classes.size, -1)) - if classes.size == 2: + n_classes = max(2, classes.size) + multi_w0 = np.reshape(w0, (n_classes, -1)) + if n_classes == 2: multi_w0 = multi_w0[1][np.newaxis, :] coefs.append(multi_w0.copy()) else: @@ -779,7 +814,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10, max_iter=100, tol=1e-4, class_weight=None, verbose=0, solver='lbfgs', penalty='l2', dual=False, intercept_scaling=1., - multi_class='ovr', random_state=None, + multi_class='warn', random_state=None, max_squared_sum=None, sample_weight=None): """Computes scores across logistic_regression_path @@ -864,11 +899,10 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10, (and therefore on the intercept) intercept_scaling has to be increased. multi_class : str, {'ovr', 'multinomial'} - Multiclass option can be either 'ovr' or 'multinomial'. If the option - chosen is 'ovr', then a binary problem is fit for each label. Else - the loss minimised is the multinomial loss fit across - the entire probability distribution. Does not work for 'liblinear' - solver. + If the option chosen is 'ovr', then a binary problem is fit for each + label. For 'multinomial' the loss minimised is the multinomial loss fit + across the entire probability distribution, *even when the data is + binary*. 'multinomial' is unavailable when solver='liblinear'. random_state : int, RandomState instance or None, optional, default None The seed of the pseudo random number generator to use when shuffling @@ -903,8 +937,6 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10, n_iter : array, shape(n_cs,) Actual number of iteration for each Cs. """ - _check_solver_option(solver, multi_class, penalty, dual) - X_train = X[train] X_test = X[test] y_train = y[train] @@ -925,7 +957,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10, check_input=False, max_squared_sum=max_squared_sum, sample_weight=sample_weight) - log_reg = LogisticRegression(multi_class=multi_class) + log_reg = LogisticRegression(solver=solver, multi_class=multi_class) # The score method of Logistic Regression has a classes_ attribute. if multi_class == 'ovr': @@ -1046,7 +1078,7 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, 'liblinear'. solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, \ - default: 'liblinear' + default: 'liblinear'. Algorithm to use in the optimization problem. @@ -1066,20 +1098,25 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, Stochastic Average Gradient descent solver. .. versionadded:: 0.19 SAGA solver. + .. versionchanged:: 0.20 + Default will change from 'liblinear' to 'lbfgs' in 0.22. max_iter : int, default: 100 Useful only for the newton-cg, sag and lbfgs solvers. Maximum number of iterations taken for the solvers to converge. - multi_class : str, {'ovr', 'multinomial'}, default: 'ovr' - Multiclass option can be either 'ovr' or 'multinomial'. If the option - chosen is 'ovr', then a binary problem is fit for each label. Else - the loss minimised is the multinomial loss fit across - the entire probability distribution. Does not work for 'liblinear' - solver. + multi_class : str, {'ovr', 'multinomial', 'auto'}, default: 'ovr' + If the option chosen is 'ovr', then a binary problem is fit for each + label. For 'multinomial' the loss minimised is the multinomial loss fit + across the entire probability distribution, *even when the data is + binary*. 'multinomial' is unavailable when solver='liblinear'. + 'auto' selects 'ovr' if the data is binary, or if solver='liblinear', + and otherwise selects 'multinomial'. .. versionadded:: 0.18 Stochastic Average Gradient descent solver for 'multinomial' case. + .. versionchanged:: 0.20 + Default will change from 'ovr' to 'auto' in 0.22. verbose : int, default: 0 For the liblinear and lbfgs solvers set verbose to any positive @@ -1133,20 +1170,20 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, >>> from sklearn.datasets import load_iris >>> from sklearn.linear_model import LogisticRegression >>> X, y = load_iris(return_X_y=True) - >>> clf = LogisticRegression(random_state=0).fit(X, y) + >>> clf = LogisticRegression(random_state=0, solver='lbfgs', + ... multi_class='multinomial').fit(X, y) >>> clf.predict(X[:2, :]) array([0, 0]) >>> clf.predict_proba(X[:2, :]) # doctest: +ELLIPSIS - array([[8.78...e-01, 1.21...e-01, 1.079...e-05], - [7.97...e-01, 2.02...e-01, 3.029...e-05]]) + array([[9.8...e-01, 1.8...e-02, 1.4...e-08], + [9.7...e-01, 2.8...e-02, ...e-08]]) >>> clf.score(X, y) - 0.96 + 0.97... See also -------- SGDClassifier : incrementally trained logistic regression (when given the parameter ``loss="log"``). - sklearn.svm.LinearSVC : learns SVM models using the same algorithm. LogisticRegressionCV : Logistic regression with built-in cross validation Notes @@ -1183,8 +1220,8 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, - random_state=None, solver='liblinear', max_iter=100, - multi_class='ovr', verbose=0, warm_start=False, n_jobs=None): + random_state=None, solver='warn', max_iter=100, + multi_class='warn', verbose=0, warm_start=False, n_jobs=None): self.penalty = penalty self.dual = dual @@ -1210,7 +1247,7 @@ def fit(self, X, y, sample_weight=None): Training vector, where n_samples is the number of samples and n_features is the number of features. - y : array-like, shape (n_samples,) + y : array-like, shape (n_samples,) or (n_samples, n_targets) Target vector relative to X. sample_weight : array-like, shape (n_samples,) optional @@ -1234,21 +1271,23 @@ def fit(self, X, y, sample_weight=None): raise ValueError("Tolerance for stopping criteria must be " "positive; got (tol=%r)" % self.tol) - if self.solver in ['newton-cg']: + solver = _check_solver(self.solver, self.penalty, self.dual) + + if solver in ['newton-cg']: _dtype = [np.float64, np.float32] else: _dtype = np.float64 X, y = check_X_y(X, y, accept_sparse='csr', dtype=_dtype, order="C", - accept_large_sparse=self.solver != 'liblinear') + accept_large_sparse=solver != 'liblinear') check_classification_targets(y) self.classes_ = np.unique(y) n_samples, n_features = X.shape - _check_solver_option(self.solver, self.multi_class, self.penalty, - self.dual) + multi_class = _check_multi_class(self.multi_class, solver, + len(self.classes_)) - if self.solver == 'liblinear': + if solver == 'liblinear': if effective_n_jobs(self.n_jobs) != 1: warnings.warn("'n_jobs' > 1 does not have any effect when" " 'solver' is set to 'liblinear'. Got 'n_jobs'" @@ -1261,7 +1300,7 @@ def fit(self, X, y, sample_weight=None): self.n_iter_ = np.array([n_iter_]) return self - if self.solver in ['sag', 'saga']: + if solver in ['sag', 'saga']: max_squared_sum = row_norms(X, squared=True).max() else: max_squared_sum = None @@ -1290,7 +1329,7 @@ def fit(self, X, y, sample_weight=None): self.intercept_ = np.zeros(n_classes) # Hack so that we iterate only once for the multinomial case. - if self.multi_class == 'multinomial': + if multi_class == 'multinomial': classes_ = [None] warm_start_coef = [warm_start_coef] if warm_start_coef is None: @@ -1300,7 +1339,7 @@ def fit(self, X, y, sample_weight=None): # The SAG solver releases the GIL so it's more efficient to use # threads for this solver. - if self.solver in ['sag', 'saga']: + if solver in ['sag', 'saga']: prefer = 'threads' else: prefer = 'processes' @@ -1308,8 +1347,8 @@ def fit(self, X, y, sample_weight=None): prefer=prefer)( path_func(X, y, pos_class=class_, Cs=[self.C], fit_intercept=self.fit_intercept, tol=self.tol, - verbose=self.verbose, solver=self.solver, - multi_class=self.multi_class, max_iter=self.max_iter, + verbose=self.verbose, solver=solver, + multi_class=multi_class, max_iter=self.max_iter, class_weight=self.class_weight, check_input=False, random_state=self.random_state, coef=warm_start_coef_, penalty=self.penalty, @@ -1320,7 +1359,7 @@ def fit(self, X, y, sample_weight=None): fold_coefs_, _, n_iter_ = zip(*fold_coefs_) self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0] - if self.multi_class == 'multinomial': + if multi_class == 'multinomial': self.coef_ = fold_coefs_[0][0] else: self.coef_ = np.asarray(fold_coefs_) @@ -1358,7 +1397,11 @@ def predict_proba(self, X): """ if not hasattr(self, "coef_"): raise NotFittedError("Call fit before prediction") - if self.multi_class == "ovr": + + ovr = (self.multi_class in ["ovr", "warn"] or + (self.multi_class == 'auto' and (self.classes_.size <= 2 or + self.solver == 'liblinear'))) + if ovr: return super(LogisticRegression, self)._predict_proba_lr(X) else: decision = self.decision_function(X) @@ -1452,7 +1495,7 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator, default scoring option used is 'accuracy'. solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, \ - default: 'lbfgs' + default: 'lbfgs'. Algorithm to use in the optimization problem. @@ -1523,15 +1566,18 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator, To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased. - multi_class : str, {'ovr', 'multinomial'} - Multiclass option can be either 'ovr' or 'multinomial'. If the option - chosen is 'ovr', then a binary problem is fit for each label. Else - the loss minimised is the multinomial loss fit across - the entire probability distribution. Does not work for 'liblinear' - solver. + multi_class : str, {'ovr', 'multinomial', 'auto'}, default: 'ovr' + If the option chosen is 'ovr', then a binary problem is fit for each + label. For 'multinomial' the loss minimised is the multinomial loss fit + across the entire probability distribution, *even when the data is + binary*. 'multinomial' is unavailable when solver='liblinear'. + 'auto' selects 'ovr' if the data is binary, or if solver='liblinear', + and otherwise selects 'multinomial'. .. versionadded:: 0.18 Stochastic Average Gradient descent solver for 'multinomial' case. + .. versionchanged:: 0.20 + Default will change from 'ovr' to 'auto' in 0.22. random_state : int, RandomState instance or None, optional, default None If int, random_state is the seed used by the random number generator; @@ -1591,25 +1637,24 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator, >>> from sklearn.datasets import load_iris >>> from sklearn.linear_model import LogisticRegressionCV >>> X, y = load_iris(return_X_y=True) - >>> clf = LogisticRegressionCV(cv=5, random_state=0).fit(X, y) + >>> clf = LogisticRegressionCV(cv=5, random_state=0, + ... multi_class='multinomial').fit(X, y) >>> clf.predict(X[:2, :]) array([0, 0]) - >>> clf.predict_proba(X[:2, :]) # doctest: +ELLIPSIS - array([[8.72...e-01, 1.27...e-01, 5.50...e-14], - [6.76...e-01, 3.23...e-01, 2.11...e-13]]) + >>> clf.predict_proba(X[:2, :]).shape + (2, 3) >>> clf.score(X, y) # doctest: +ELLIPSIS - 0.9266... + 0.98... See also -------- LogisticRegression """ - def __init__(self, Cs=10, fit_intercept=True, cv='warn', dual=False, penalty='l2', scoring=None, solver='lbfgs', tol=1e-4, max_iter=100, class_weight=None, n_jobs=None, verbose=0, - refit=True, intercept_scaling=1., multi_class='ovr', + refit=True, intercept_scaling=1., multi_class='warn', random_state=None): self.Cs = Cs self.fit_intercept = fit_intercept @@ -1637,7 +1682,7 @@ def fit(self, X, y, sample_weight=None): Training vector, where n_samples is the number of samples and n_features is the number of features. - y : array-like, shape (n_samples,) + y : array-like, shape (n_samples,) or (n_samples, n_targets) Target vector relative to X. sample_weight : array-like, shape (n_samples,) optional @@ -1648,8 +1693,7 @@ def fit(self, X, y, sample_weight=None): ------- self : object """ - _check_solver_option(self.solver, self.multi_class, self.penalty, - self.dual) + solver = _check_solver(self.solver, self.penalty, self.dual) if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0: raise ValueError("Maximum number of iteration must be positive;" @@ -1660,7 +1704,7 @@ def fit(self, X, y, sample_weight=None): X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64, order="C", - accept_large_sparse=self.solver != 'liblinear') + accept_large_sparse=solver != 'liblinear') check_classification_targets(y) class_weight = self.class_weight @@ -1676,7 +1720,10 @@ def fit(self, X, y, sample_weight=None): classes = self.classes_ = label_encoder.classes_ encoded_labels = label_encoder.transform(label_encoder.classes_) - if self.solver in ['sag', 'saga']: + multi_class = _check_multi_class(self.multi_class, solver, + len(classes)) + + if solver in ['sag', 'saga']: max_squared_sum = row_norms(X, squared=True).max() else: max_squared_sum = None @@ -1702,7 +1749,7 @@ def fit(self, X, y, sample_weight=None): # We need this hack to iterate only once over labels, in the case of # multi_class = multinomial, without changing the value of the labels. - if self.multi_class == 'multinomial': + if multi_class == 'multinomial': iter_encoded_labels = iter_classes = [None] else: iter_encoded_labels = encoded_labels @@ -1727,10 +1774,10 @@ def fit(self, X, y, sample_weight=None): prefer=prefer)( path_func(X, y, train, test, pos_class=label, Cs=self.Cs, fit_intercept=self.fit_intercept, penalty=self.penalty, - dual=self.dual, solver=self.solver, tol=self.tol, + dual=self.dual, solver=solver, tol=self.tol, max_iter=self.max_iter, verbose=self.verbose, class_weight=class_weight, scoring=self.scoring, - multi_class=self.multi_class, + multi_class=multi_class, intercept_scaling=self.intercept_scaling, random_state=self.random_state, max_squared_sum=max_squared_sum, @@ -1739,7 +1786,7 @@ def fit(self, X, y, sample_weight=None): for label in iter_encoded_labels for train, test in folds) - if self.multi_class == 'multinomial': + if multi_class == 'multinomial': multi_coefs_paths, Cs, multi_scores, n_iter_ = zip(*fold_coefs_) multi_coefs_paths = np.asarray(multi_coefs_paths) multi_scores = np.asarray(multi_scores) @@ -1776,14 +1823,14 @@ def fit(self, X, y, sample_weight=None): self.intercept_ = np.zeros(n_classes) # hack to iterate only once for multinomial case. - if self.multi_class == 'multinomial': + if multi_class == 'multinomial': scores = multi_scores coefs_paths = multi_coefs_paths for index, (cls, encoded_label) in enumerate( zip(iter_classes, iter_encoded_labels)): - if self.multi_class == 'ovr': + if multi_class == 'ovr': # The scores_ / coefs_paths_ dict have unencoded class # labels as their keys scores = self.scores_[cls] @@ -1794,7 +1841,7 @@ def fit(self, X, y, sample_weight=None): C_ = self.Cs_[best_index] self.C_.append(C_) - if self.multi_class == 'multinomial': + if multi_class == 'multinomial': coef_init = np.mean(coefs_paths[:, best_index, :, :], axis=0) else: @@ -1803,12 +1850,12 @@ def fit(self, X, y, sample_weight=None): # Note that y is label encoded and hence pos_class must be # the encoded label / None (for 'multinomial') w, _, _ = logistic_regression_path( - X, y, pos_class=encoded_label, Cs=[C_], solver=self.solver, + X, y, pos_class=encoded_label, Cs=[C_], solver=solver, fit_intercept=self.fit_intercept, coef=coef_init, max_iter=self.max_iter, tol=self.tol, penalty=self.penalty, class_weight=class_weight, - multi_class=self.multi_class, + multi_class=multi_class, verbose=max(0, self.verbose - 1), random_state=self.random_state, check_input=False, max_squared_sum=max_squared_sum, @@ -1823,7 +1870,7 @@ def fit(self, X, y, sample_weight=None): for i in range(len(folds))], axis=0) self.C_.append(np.mean(self.Cs_[best_indices])) - if self.multi_class == 'multinomial': + if multi_class == 'multinomial': self.C_ = np.tile(self.C_, n_classes) self.coef_ = w[:, :X.shape[1]] if self.fit_intercept: diff --git a/sklearn/linear_model/randomized_l1.py b/sklearn/linear_model/randomized_l1.py index 0a2830518505a..db717a3f9974a 100644 --- a/sklearn/linear_model/randomized_l1.py +++ b/sklearn/linear_model/randomized_l1.py @@ -380,7 +380,8 @@ def _randomized_logistic(X, y, weights, mask, C=1., verbose=False, for this_C, this_scores in zip(C, scores.T): # XXX : would be great to do it with a warm_start ... clf = LogisticRegression(C=this_C, tol=tol, penalty='l1', dual=False, - fit_intercept=fit_intercept) + fit_intercept=fit_intercept, + solver='liblinear', multi_class='ovr') clf.fit(X, y) this_scores[:] = np.any( np.abs(clf.coef_) > 10 * np.finfo(np.float).eps, axis=0) diff --git a/sklearn/linear_model/sag.py b/sklearn/linear_model/sag.py index 06a72d47b4c9c..3e8861f26de83 100644 --- a/sklearn/linear_model/sag.py +++ b/sklearn/linear_model/sag.py @@ -212,13 +212,15 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0., >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) >>> y = np.array([1, 1, 2, 2]) - >>> clf = linear_model.LogisticRegression(solver='sag') + >>> clf = linear_model.LogisticRegression( + ... solver='sag', multi_class='multinomial') >>> clf.fit(X, y) ... #doctest: +NORMALIZE_WHITESPACE LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, - multi_class='ovr', n_jobs=None, penalty='l2', random_state=None, - solver='sag', tol=0.0001, verbose=0, warm_start=False) + multi_class='multinomial', n_jobs=None, penalty='l2', + random_state=None, solver='sag', tol=0.0001, verbose=0, + warm_start=False) References ---------- diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 9fc6d6ec5f048..d6be7e2a16c16 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -4,6 +4,7 @@ import pytest +from sklearn.base import clone from sklearn.datasets import load_iris, make_classification from sklearn.metrics import log_loss from sklearn.metrics.scorer import get_scorer @@ -22,6 +23,7 @@ from sklearn.utils.testing import assert_warns from sklearn.utils.testing import ignore_warnings from sklearn.utils.testing import assert_warns_message +from sklearn.utils.testing import assert_no_warnings from sklearn.exceptions import ConvergenceWarning from sklearn.exceptions import ChangedBehaviorWarning @@ -57,6 +59,8 @@ def check_predictions(clf, X, y): assert_array_equal(probabilities.argmax(axis=1), y) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_predict_2_classes(): # Simple sanity check on a 2 classes dataset # Make sure it predicts the correct result on simple datasets. @@ -72,6 +76,7 @@ def test_predict_2_classes(): random_state=0), X_sp, Y1) +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_error(): # Test for appropriate exception on errors msg = "Penalty term must be positive" @@ -95,6 +100,7 @@ def test_error(): assert_raise_message(ValueError, msg, LR(max_iter="test").fit, X, Y1) +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_logistic_cv_mock_scorer(): class MockScorer(object): @@ -130,7 +136,7 @@ def __call__(self, model, X, y, sample_weight=None): def test_logistic_cv_score_does_not_warn_by_default(): - lr = LogisticRegressionCV(cv=2) + lr = LogisticRegressionCV(cv=2, multi_class='ovr') lr.fit(X, Y1) with pytest.warns(None) as record: @@ -142,7 +148,7 @@ def test_lr_liblinear_warning(): n_samples, n_features = iris.data.shape target = iris.target_names[iris.target] - lr = LogisticRegression(solver='liblinear', n_jobs=2) + lr = LogisticRegression(solver='liblinear', multi_class='ovr', n_jobs=2) assert_warns_message(UserWarning, "'n_jobs' > 1 does not have any effect when" " 'solver' is set to 'liblinear'. Got 'n_jobs'" @@ -150,6 +156,8 @@ def test_lr_liblinear_warning(): lr.fit, iris.data, target) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_predict_3_classes(): check_predictions(LogisticRegression(C=10), X, Y2) check_predictions(LogisticRegression(C=10), X_sp, Y2) @@ -164,7 +172,8 @@ def test_predict_iris(): # Test that both multinomial and OvR solvers handle # multiclass data correctly and give good accuracy # score (>0.95) for the training data. - for clf in [LogisticRegression(C=len(iris.data)), + for clf in [LogisticRegression(C=len(iris.data), solver='liblinear', + multi_class='ovr'), LogisticRegression(C=len(iris.data), solver='lbfgs', multi_class='multinomial'), LogisticRegression(C=len(iris.data), solver='newton-cg', @@ -198,12 +207,13 @@ def test_multinomial_validation(solver): def test_check_solver_option(LR): X, y = iris.data, iris.target - msg = ('Logistic Regression supports only liblinear, newton-cg, ' - 'lbfgs, sag and saga solvers, got wrong_name') - lr = LR(solver="wrong_name") + msg = ("Logistic Regression supports only solvers in ['liblinear', " + "'newton-cg', 'lbfgs', 'sag', 'saga'], got wrong_name.") + lr = LR(solver="wrong_name", multi_class="ovr") assert_raise_message(ValueError, msg, lr.fit, X, y) - msg = "multi_class should be either multinomial or ovr, got wrong_name" + msg = ("multi_class should be 'multinomial', 'ovr' or 'auto'. " + "Got wrong_name") lr = LR(solver='newton-cg', multi_class="wrong_name") assert_raise_message(ValueError, msg, lr.fit, X, y) @@ -216,15 +226,40 @@ def test_check_solver_option(LR): for solver in ['newton-cg', 'lbfgs', 'sag']: msg = ("Solver %s supports only l2 penalties, got l1 penalty." % solver) - lr = LR(solver=solver, penalty='l1') + lr = LR(solver=solver, penalty='l1', multi_class='ovr') assert_raise_message(ValueError, msg, lr.fit, X, y) for solver in ['newton-cg', 'lbfgs', 'sag', 'saga']: msg = ("Solver %s supports only dual=False, got dual=True" % solver) - lr = LR(solver=solver, dual=True) + lr = LR(solver=solver, dual=True, multi_class='ovr') assert_raise_message(ValueError, msg, lr.fit, X, y) +@pytest.mark.parametrize('model, params, warn_solver', + [(LogisticRegression, {}, True), + (LogisticRegressionCV, {'cv': 5}, False)]) +def test_logistic_regression_warnings(model, params, warn_solver): + clf_solver_warning = model(multi_class='ovr', **params) + clf_multi_class_warning = model(solver='lbfgs', **params) + clf_no_warnings = model(solver='lbfgs', multi_class='ovr', **params) + + solver_warning_msg = "Default solver will be changed to 'lbfgs'" + multi_class_warning_msg = "Default multi_class will be changed to 'auto" + + if warn_solver: + assert_warns_message(FutureWarning, solver_warning_msg, + clf_solver_warning.fit, iris.data, iris.target) + else: + assert_no_warnings(clf_no_warnings.fit, iris.data, iris.target) + + assert_warns_message(FutureWarning, multi_class_warning_msg, + clf_multi_class_warning.fit, iris.data, iris.target) + # But no warning when binary target: + assert_no_warnings(clf_multi_class_warning.fit, + iris.data, iris.target == 0) + assert_no_warnings(clf_no_warnings.fit, iris.data, iris.target) + + @pytest.mark.parametrize('solver', ['lbfgs', 'newton-cg', 'sag', 'saga']) def test_multinomial_binary(solver): # Test multinomial LR on a binary problem. @@ -259,11 +294,13 @@ def test_multinomial_binary_probabilities(): expected_proba_class_1 = (np.exp(decision) / (np.exp(decision) + np.exp(-decision))) - expected_proba = np.c_[1-expected_proba_class_1, expected_proba_class_1] + expected_proba = np.c_[1 - expected_proba_class_1, expected_proba_class_1] assert_almost_equal(proba, expected_proba) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_sparsify(): # Test sparsify and densify members. n_samples, n_features = iris.data.shape @@ -287,6 +324,8 @@ def test_sparsify(): assert_array_almost_equal(pred_d_d, pred_d_s) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_inconsistent_input(): # Test that an exception is raised on inconsistent input rng = np.random.RandomState(0) @@ -305,6 +344,8 @@ def test_inconsistent_input(): rng.random_sample((3, 12))) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_write_parameters(): # Test that we can write to coef_ and intercept_ clf = LogisticRegression(random_state=0) @@ -314,6 +355,8 @@ def test_write_parameters(): assert_array_almost_equal(clf.decision_function(X), 0) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_nan(): # Test proper NaN handling. # Regression test for Issue #252: fit used to go into an infinite loop. @@ -336,12 +379,11 @@ def test_consistency_path(): for solver in ['sag', 'saga']: coefs, Cs, _ = f(logistic_regression_path)( X, y, Cs=Cs, fit_intercept=False, tol=1e-5, solver=solver, - max_iter=1000, - random_state=0) + max_iter=1000, multi_class='ovr', random_state=0) for i, C in enumerate(Cs): lr = LogisticRegression(C=C, fit_intercept=False, tol=1e-5, - solver=solver, - random_state=0) + solver=solver, multi_class='ovr', + random_state=0, max_iter=1000) lr.fit(X, y) lr_coef = lr.coef_.ravel() assert_array_almost_equal(lr_coef, coefs[i], decimal=4, @@ -352,9 +394,10 @@ def test_consistency_path(): Cs = [1e3] coefs, Cs, _ = f(logistic_regression_path)( X, y, Cs=Cs, fit_intercept=True, tol=1e-6, solver=solver, - intercept_scaling=10000., random_state=0) + intercept_scaling=10000., random_state=0, multi_class='ovr') lr = LogisticRegression(C=Cs[0], fit_intercept=True, tol=1e-4, - intercept_scaling=10000., random_state=0) + intercept_scaling=10000., random_state=0, + multi_class='ovr', solver=solver) lr.fit(X, y) lr_coef = np.concatenate([lr.coef_.ravel(), lr.intercept_]) assert_array_almost_equal(lr_coef, coefs[0], decimal=4, @@ -373,11 +416,14 @@ def test_logistic_regression_path_convergence_fail(): def test_liblinear_dual_random_state(): # random_state is relevant for liblinear solver only if dual=True X, y = make_classification(n_samples=20, random_state=0) - lr1 = LogisticRegression(random_state=0, dual=True, max_iter=1, tol=1e-15) + lr1 = LogisticRegression(random_state=0, dual=True, max_iter=1, tol=1e-15, + solver='liblinear', multi_class='ovr') lr1.fit(X, y) - lr2 = LogisticRegression(random_state=0, dual=True, max_iter=1, tol=1e-15) + lr2 = LogisticRegression(random_state=0, dual=True, max_iter=1, tol=1e-15, + solver='liblinear', multi_class='ovr') lr2.fit(X, y) - lr3 = LogisticRegression(random_state=8, dual=True, max_iter=1, tol=1e-15) + lr3 = LogisticRegression(random_state=8, dual=True, max_iter=1, tol=1e-15, + solver='liblinear', multi_class='ovr') lr3.fit(X, y) # same result for same random state @@ -477,9 +523,10 @@ def test_logistic_cv(): X_ref -= X_ref.mean() X_ref /= X_ref.std() lr_cv = LogisticRegressionCV(Cs=[1.], fit_intercept=False, - solver='liblinear') + solver='liblinear', multi_class='ovr') lr_cv.fit(X_ref, y) - lr = LogisticRegression(C=1., fit_intercept=False) + lr = LogisticRegression(C=1., fit_intercept=False, + solver='liblinear', multi_class='ovr') lr.fit(X_ref, y) assert_array_almost_equal(lr.coef_, lr_cv.coef_) @@ -568,6 +615,7 @@ def test_multinomial_logistic_regression_string_inputs(): assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))), ['bar', 'baz']) +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 def test_logistic_cv_sparse(): X, y = make_classification(n_samples=50, n_features=5, @@ -630,11 +678,11 @@ def test_ovr_multinomial_iris(): precomputed_folds = list(cv.split(train, target)) # Train clf on the original dataset where classes 0 and 1 are separated - clf = LogisticRegressionCV(cv=precomputed_folds) + clf = LogisticRegressionCV(cv=precomputed_folds, multi_class='ovr') clf.fit(train, target) # Conflate classes 0 and 1 and train clf1 on this modified dataset - clf1 = LogisticRegressionCV(cv=precomputed_folds) + clf1 = LogisticRegressionCV(cv=precomputed_folds, multi_class='ovr') target_copy = target.copy() target_copy[target_copy == 0] = 1 clf1.fit(train, target_copy) @@ -680,13 +728,12 @@ def test_ovr_multinomial_iris(): def test_logistic_regression_solvers(): X, y = make_classification(n_features=10, n_informative=5, random_state=0) - ncg = LogisticRegression(solver='newton-cg', fit_intercept=False) - lbf = LogisticRegression(solver='lbfgs', fit_intercept=False) - lib = LogisticRegression(fit_intercept=False) - sag = LogisticRegression(solver='sag', fit_intercept=False, - random_state=42) - saga = LogisticRegression(solver='saga', fit_intercept=False, - random_state=42) + params = dict(fit_intercept=False, random_state=42, multi_class='ovr') + ncg = LogisticRegression(solver='newton-cg', **params) + lbf = LogisticRegression(solver='lbfgs', **params) + lib = LogisticRegression(solver='liblinear', **params) + sag = LogisticRegression(solver='sag', **params) + saga = LogisticRegression(solver='saga', **params) ncg.fit(X, y) lbf.fit(X, y) sag.fit(X, y) @@ -708,13 +755,13 @@ def test_logistic_regression_solvers_multiclass(): X, y = make_classification(n_samples=20, n_features=20, n_informative=10, n_classes=3, random_state=0) tol = 1e-7 - ncg = LogisticRegression(solver='newton-cg', fit_intercept=False, tol=tol) - lbf = LogisticRegression(solver='lbfgs', fit_intercept=False, tol=tol) - lib = LogisticRegression(fit_intercept=False, tol=tol) - sag = LogisticRegression(solver='sag', fit_intercept=False, tol=tol, - max_iter=1000, random_state=42) - saga = LogisticRegression(solver='saga', fit_intercept=False, tol=tol, - max_iter=10000, random_state=42) + params = dict(fit_intercept=False, tol=tol, random_state=42, + multi_class='ovr') + ncg = LogisticRegression(solver='newton-cg', **params) + lbf = LogisticRegression(solver='lbfgs', **params) + lib = LogisticRegression(solver='liblinear', **params) + sag = LogisticRegression(solver='sag', max_iter=1000, **params) + saga = LogisticRegression(solver='saga', max_iter=10000, **params) ncg.fit(X, y) lbf.fit(X, y) sag.fit(X, y) @@ -744,20 +791,25 @@ def test_logistic_regressioncv_class_weights(): clf_lbf = LogisticRegressionCV(solver='lbfgs', Cs=1, fit_intercept=False, + multi_class='ovr', class_weight=class_weight) clf_ncg = LogisticRegressionCV(solver='newton-cg', Cs=1, fit_intercept=False, + multi_class='ovr', class_weight=class_weight) clf_lib = LogisticRegressionCV(solver='liblinear', Cs=1, fit_intercept=False, + multi_class='ovr', class_weight=class_weight) clf_sag = LogisticRegressionCV(solver='sag', Cs=1, fit_intercept=False, + multi_class='ovr', class_weight=class_weight, tol=1e-5, max_iter=10000, random_state=0) clf_saga = LogisticRegressionCV(solver='saga', Cs=1, fit_intercept=False, + multi_class='ovr', class_weight=class_weight, tol=1e-5, max_iter=10000, random_state=0) @@ -784,27 +836,29 @@ def test_logistic_regression_sample_weights(): # not passing them at all (default None) for solver in ['lbfgs', 'liblinear']: clf_sw_none = LR(solver=solver, fit_intercept=False, - random_state=42) + random_state=42, multi_class='ovr') clf_sw_none.fit(X, y) clf_sw_ones = LR(solver=solver, fit_intercept=False, - random_state=42) + random_state=42, multi_class='ovr') clf_sw_ones.fit(X, y, sample_weight=np.ones(y.shape[0])) assert_array_almost_equal( clf_sw_none.coef_, clf_sw_ones.coef_, decimal=4) # Test that sample weights work the same with the lbfgs, # newton-cg, and 'sag' solvers - clf_sw_lbfgs = LR(solver='lbfgs', fit_intercept=False, random_state=42) + clf_sw_lbfgs = LR(solver='lbfgs', fit_intercept=False, random_state=42, + multi_class='ovr') clf_sw_lbfgs.fit(X, y, sample_weight=sample_weight) - clf_sw_n = LR(solver='newton-cg', fit_intercept=False, random_state=42) + clf_sw_n = LR(solver='newton-cg', fit_intercept=False, random_state=42, + multi_class='ovr') clf_sw_n.fit(X, y, sample_weight=sample_weight) clf_sw_sag = LR(solver='sag', fit_intercept=False, tol=1e-10, - random_state=42) + random_state=42, multi_class='ovr') # ignore convergence warning due to small dataset with ignore_warnings(): clf_sw_sag.fit(X, y, sample_weight=sample_weight) clf_sw_liblinear = LR(solver='liblinear', fit_intercept=False, - random_state=42) + random_state=42, multi_class='ovr') clf_sw_liblinear.fit(X, y, sample_weight=sample_weight) assert_array_almost_equal( clf_sw_lbfgs.coef_, clf_sw_n.coef_, decimal=4) @@ -818,9 +872,11 @@ def test_logistic_regression_sample_weights(): # to be 2 for all instances of class 2 for solver in ['lbfgs', 'liblinear']: clf_cw_12 = LR(solver=solver, fit_intercept=False, - class_weight={0: 1, 1: 2}, random_state=42) + class_weight={0: 1, 1: 2}, random_state=42, + multi_class='ovr') clf_cw_12.fit(X, y) - clf_sw_12 = LR(solver=solver, fit_intercept=False, random_state=42) + clf_sw_12 = LR(solver=solver, fit_intercept=False, random_state=42, + multi_class='ovr') clf_sw_12.fit(X, y, sample_weight=sample_weight) assert_array_almost_equal( clf_cw_12.coef_, clf_sw_12.coef_, decimal=4) @@ -829,21 +885,21 @@ def test_logistic_regression_sample_weights(): # since the patched liblinear code is different. clf_cw = LogisticRegression( solver="liblinear", fit_intercept=False, class_weight={0: 1, 1: 2}, - penalty="l1", tol=1e-5, random_state=42) + penalty="l1", tol=1e-5, random_state=42, multi_class='ovr') clf_cw.fit(X, y) clf_sw = LogisticRegression( solver="liblinear", fit_intercept=False, penalty="l1", tol=1e-5, - random_state=42) + random_state=42, multi_class='ovr') clf_sw.fit(X, y, sample_weight) assert_array_almost_equal(clf_cw.coef_, clf_sw.coef_, decimal=4) clf_cw = LogisticRegression( solver="liblinear", fit_intercept=False, class_weight={0: 1, 1: 2}, - penalty="l2", dual=True, random_state=42) + penalty="l2", dual=True, random_state=42, multi_class='ovr') clf_cw.fit(X, y) clf_sw = LogisticRegression( solver="liblinear", fit_intercept=False, penalty="l2", dual=True, - random_state=42) + random_state=42, multi_class='ovr') clf_sw.fit(X, y, sample_weight) assert_array_almost_equal(clf_cw.coef_, clf_sw.coef_, decimal=4) @@ -974,7 +1030,8 @@ def test_liblinear_decision_function_zero(): # See Issue: https://github.com/scikit-learn/scikit-learn/issues/3600 # and the PR https://github.com/scikit-learn/scikit-learn/pull/3623 X, y = make_classification(n_samples=5, n_features=5, random_state=0) - clf = LogisticRegression(fit_intercept=False) + clf = LogisticRegression(fit_intercept=False, solver='liblinear', + multi_class='ovr') clf.fit(X, y) # Dummy data such that the decision function becomes zero. @@ -987,10 +1044,11 @@ def test_liblinear_logregcv_sparse(): # Test LogRegCV with solver='liblinear' works for sparse matrices X, y = make_classification(n_samples=10, n_features=5, random_state=0) - clf = LogisticRegressionCV(solver='liblinear') + clf = LogisticRegressionCV(solver='liblinear', multi_class='ovr') clf.fit(sparse.csr_matrix(X), y) +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 def test_saga_sparse(): # Test LogRegCV with solver='liblinear' works for sparse matrices @@ -1004,13 +1062,16 @@ def test_logreg_intercept_scaling(): # Test that the right error message is thrown when intercept_scaling <= 0 for i in [-1, 0]: - clf = LogisticRegression(intercept_scaling=i) + clf = LogisticRegression(intercept_scaling=i, solver='liblinear', + multi_class='ovr') msg = ('Intercept scaling is %r but needs to be greater than 0.' ' To disable fitting an intercept,' ' set fit_intercept=False.' % clf.intercept_scaling) assert_raise_message(ValueError, msg, clf.fit, X, Y1) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_logreg_intercept_scaling_zero(): # Test that intercept_scaling is ignored when fit_intercept is False @@ -1031,12 +1092,12 @@ def test_logreg_l1(): X_constant = np.ones(shape=(n_samples, 2)) X = np.concatenate((X, X_noise, X_constant), axis=1) lr_liblinear = LogisticRegression(penalty="l1", C=1.0, solver='liblinear', - fit_intercept=False, + fit_intercept=False, multi_class='ovr', tol=1e-10) lr_liblinear.fit(X, y) lr_saga = LogisticRegression(penalty="l1", C=1.0, solver='saga', - fit_intercept=False, + fit_intercept=False, multi_class='ovr', max_iter=1000, tol=1e-10) lr_saga.fit(X, y) assert_array_almost_equal(lr_saga.coef_, lr_liblinear.coef_) @@ -1062,12 +1123,12 @@ def test_logreg_l1_sparse_data(): X = sparse.csr_matrix(X) lr_liblinear = LogisticRegression(penalty="l1", C=1.0, solver='liblinear', - fit_intercept=False, + fit_intercept=False, multi_class='ovr', tol=1e-10) lr_liblinear.fit(X, y) lr_saga = LogisticRegression(penalty="l1", C=1.0, solver='saga', - fit_intercept=False, + fit_intercept=False, multi_class='ovr', max_iter=1000, tol=1e-10) lr_saga.fit(X, y) assert_array_almost_equal(lr_saga.coef_, lr_liblinear.coef_) @@ -1078,19 +1139,20 @@ def test_logreg_l1_sparse_data(): # Check that solving on the sparse and dense data yield the same results lr_saga_dense = LogisticRegression(penalty="l1", C=1.0, solver='saga', - fit_intercept=False, + fit_intercept=False, multi_class='ovr', max_iter=1000, tol=1e-10) lr_saga_dense.fit(X.toarray(), y) assert_array_almost_equal(lr_saga.coef_, lr_saga_dense.coef_) +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 def test_logreg_cv_penalty(): # Test that the correct penalty is passed to the final fit. X, y = make_classification(n_samples=50, n_features=20, random_state=0) - lr_cv = LogisticRegressionCV(penalty="l1", Cs=[1.0], solver='liblinear') + lr_cv = LogisticRegressionCV(penalty="l1", Cs=[1.0], solver='saga') lr_cv.fit(X, y) - lr = LogisticRegression(penalty="l1", C=1.0, solver='liblinear') + lr = LogisticRegression(penalty="l1", C=1.0, solver='saga') lr.fit(X, y) assert_equal(np.count_nonzero(lr_cv.coef_), np.count_nonzero(lr.coef_)) @@ -1327,3 +1389,46 @@ def test_logistic_regression_path_coefs_multinomial(): assert_array_almost_equal(coefs[0], coefs[2], decimal=1) with pytest.raises(AssertionError): assert_array_almost_equal(coefs[1], coefs[2], decimal=1) + + +@pytest.mark.parametrize('est', [LogisticRegression(random_state=0), + LogisticRegressionCV(random_state=0, cv=3), + ]) +@pytest.mark.parametrize('solver', ['liblinear', 'lbfgs', 'newton-cg', 'sag', + 'saga']) +def test_logistic_regression_multi_class_auto(est, solver): + # check multi_class='auto' => multi_class='ovr' iff binary y or liblinear + + def fit(X, y, **kw): + return clone(est).set_params(**kw).fit(X, y) + + X = iris.data[::10] + X2 = iris.data[1::10] + y_multi = iris.target[::10] + y_bin = y_multi == 0 + est_auto_bin = fit(X, y_bin, multi_class='auto', solver=solver) + est_ovr_bin = fit(X, y_bin, multi_class='ovr', solver=solver) + assert np.allclose(est_auto_bin.coef_, est_ovr_bin.coef_) + assert np.allclose(est_auto_bin.predict_proba(X2), + est_ovr_bin.predict_proba(X2)) + + est_auto_multi = fit(X, y_multi, multi_class='auto', solver=solver) + if solver == 'liblinear': + est_ovr_multi = fit(X, y_multi, multi_class='ovr', solver=solver) + assert np.allclose(est_auto_multi.coef_, est_ovr_multi.coef_) + assert np.allclose(est_auto_multi.predict_proba(X2), + est_ovr_multi.predict_proba(X2)) + else: + est_multi_multi = fit(X, y_multi, multi_class='multinomial', + solver=solver) + assert np.allclose(est_auto_multi.coef_, est_multi_multi.coef_) + assert np.allclose(est_auto_multi.predict_proba(X2), + est_multi_multi.predict_proba(X2)) + + # Make sure multi_class='ovr' is distinct from ='multinomial' + assert not np.allclose(est_auto_bin.coef_, + fit(X, y_bin, multi_class='multinomial', + solver=solver).coef_) + assert not np.allclose(est_auto_bin.coef_, + fit(X, y_multi, multi_class='multinomial', + solver=solver).coef_) diff --git a/sklearn/linear_model/tests/test_sag.py b/sklearn/linear_model/tests/test_sag.py index ca99a81a73963..8f4dbc8794fc8 100644 --- a/sklearn/linear_model/tests/test_sag.py +++ b/sklearn/linear_model/tests/test_sag.py @@ -247,7 +247,8 @@ def test_classifier_matching(): n_iter = 300 clf = LogisticRegression(solver=solver, fit_intercept=fit_intercept, tol=1e-11, C=1. / alpha / n_samples, - max_iter=n_iter, random_state=10) + max_iter=n_iter, random_state=10, + multi_class='ovr') clf.fit(X, y) weights, intercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter, @@ -311,11 +312,12 @@ def test_sag_pobj_matches_logistic_regression(): clf1 = LogisticRegression(solver='sag', fit_intercept=False, tol=.0000001, C=1. / alpha / n_samples, max_iter=max_iter, - random_state=10) + random_state=10, multi_class='ovr') clf2 = clone(clf1) clf3 = LogisticRegression(fit_intercept=False, tol=.0000001, C=1. / alpha / n_samples, max_iter=max_iter, - random_state=10) + random_state=10, multi_class='ovr', + solver='lbfgs') clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) @@ -507,7 +509,7 @@ def test_sag_classifier_computed_correctly(): clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples, max_iter=n_iter, tol=tol, random_state=77, - fit_intercept=fit_intercept) + fit_intercept=fit_intercept, multi_class='ovr') clf2 = clone(clf1) clf1.fit(X, y) @@ -547,7 +549,7 @@ def test_sag_multiclass_computed_correctly(): clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples, max_iter=max_iter, tol=tol, random_state=77, - fit_intercept=fit_intercept) + fit_intercept=fit_intercept, multi_class='ovr') clf2 = clone(clf1) clf1.fit(X, y) @@ -591,6 +593,7 @@ def test_sag_multiclass_computed_correctly(): assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1) +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_classifier_results(): """tests if classifier results match target""" alpha = .1 @@ -634,7 +637,7 @@ def test_binary_classifier_class_weight(): class_weight = {1: .45, -1: .55} clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples, max_iter=n_iter, tol=tol, random_state=77, - fit_intercept=fit_intercept, + fit_intercept=fit_intercept, multi_class='ovr', class_weight=class_weight) clf2 = clone(clf1) @@ -681,7 +684,7 @@ def test_multiclass_classifier_class_weight(): clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples, max_iter=max_iter, tol=tol, random_state=77, - fit_intercept=fit_intercept, + fit_intercept=fit_intercept, multi_class='ovr', class_weight=class_weight) clf2 = clone(clf1) clf1.fit(X, y) @@ -728,6 +731,7 @@ def test_multiclass_classifier_class_weight(): assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1) +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_classifier_single_class(): """tests if ValueError is thrown with only one class""" X = [[1, 2], [3, 4]] @@ -740,6 +744,7 @@ def test_classifier_single_class(): X, y) +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_step_size_alpha_error(): X = [[0, 0], [0, 0]] y = [1, -1] diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index f418a9375d993..da04b4215dce0 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -334,6 +334,8 @@ def test_regression_scorers(): assert_almost_equal(score1, score2) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_thresholded_scorers(): # Test scorers that take thresholds. X, y = make_blobs(random_state=0, centers=2) @@ -504,6 +506,8 @@ def test_scorer_memmap_input(name): check_scorer_memmap(name) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_scoring_is_not_metric(): assert_raises_regexp(ValueError, 'make_scorer', check_scoring, LogisticRegression(), f1_score) diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index bb7e736eb3c3b..0d7a05f39d714 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -790,6 +790,8 @@ def test_cross_val_score_multilabel(): assert_almost_equal(score_samples, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4]) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 def test_cross_val_predict(): boston = load_boston() @@ -840,6 +842,8 @@ def split(self, X, y=None, groups=None): X, y, method='predict_proba', cv=KFold(2)) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 def test_cross_val_predict_decision_function_shape(): X, y = make_classification(n_classes=2, n_samples=50, random_state=0) @@ -887,6 +891,8 @@ def test_cross_val_predict_decision_function_shape(): cv=KFold(n_splits=3), method='decision_function') +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 def test_cross_val_predict_predict_proba_shape(): X, y = make_classification(n_classes=2, n_samples=50, random_state=0) @@ -902,6 +908,8 @@ def test_cross_val_predict_predict_proba_shape(): assert_equal(preds.shape, (150, 3)) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 def test_cross_val_predict_predict_log_proba_shape(): X, y = make_classification(n_classes=2, n_samples=50, random_state=0) @@ -917,6 +925,8 @@ def test_cross_val_predict_predict_log_proba_shape(): assert_equal(preds.shape, (150, 3)) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 def test_cross_val_predict_input_types(): iris = load_iris() @@ -1336,6 +1346,8 @@ def check_cross_val_predict_with_method(est): assert_array_equal(predictions, predictions_ystr) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 def test_cross_val_predict_with_method(): check_cross_val_predict_with_method(LogisticRegression()) @@ -1350,6 +1362,8 @@ def test_cross_val_predict_method_checking(): check_cross_val_predict_with_method(est) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore: The default of the `iid`') @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 def test_gridsearchcv_cross_val_predict_with_method(): @@ -1379,11 +1393,13 @@ def get_expected_predictions(X, y, cv, classes, est, method): return expected_predictions +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 def test_cross_val_predict_class_subset(): X = np.arange(200).reshape(100, 2) - y = np.array([x//10 for x in range(100)]) + y = np.array([x // 10 for x in range(100)]) classes = 10 kfold3 = KFold(n_splits=3) diff --git a/sklearn/svm/tests/test_bounds.py b/sklearn/svm/tests/test_bounds.py index d02c53b05d8b7..fffd7fc787938 100644 --- a/sklearn/svm/tests/test_bounds.py +++ b/sklearn/svm/tests/test_bounds.py @@ -45,7 +45,8 @@ def check_l1_min_c(X, y, loss, fit_intercept=True, intercept_scaling=None): min_c = l1_min_c(X, y, loss, fit_intercept, intercept_scaling) clf = { - 'log': LogisticRegression(penalty='l1'), + 'log': LogisticRegression(penalty='l1', solver='liblinear', + multi_class='ovr'), 'squared_hinge': LinearSVC(loss='squared_hinge', penalty='l1', dual=False), }[loss] diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py index 5fa83050a98f1..8a3ac8423f7d6 100644 --- a/sklearn/svm/tests/test_sparse.py +++ b/sklearn/svm/tests/test_sparse.py @@ -1,3 +1,4 @@ +import pytest import numpy as np from scipy import sparse from numpy.testing import (assert_array_almost_equal, assert_array_equal, @@ -234,6 +235,8 @@ def test_linearsvc_iris(): assert_array_equal(pred, sp_clf.predict(iris.data)) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_weight(): # Test class weights X_, y_ = make_classification(n_samples=200, n_features=100, diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index 6187a08f7b757..4a8e4ef735888 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -5,6 +5,8 @@ """ import numpy as np import itertools +import pytest + from numpy.testing import assert_array_equal, assert_array_almost_equal from numpy.testing import assert_almost_equal from numpy.testing import assert_allclose @@ -403,6 +405,8 @@ def test_svr_predict(): assert_array_almost_equal(dec.ravel(), reg.predict(X).ravel()) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_weight(): # Test class weights clf = svm.SVC(gamma='scale', class_weight={1: 0.1}) @@ -442,6 +446,8 @@ def test_sample_weights(): assert_array_almost_equal(dual_coef_no_weight, clf.dual_coef_) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @ignore_warnings(category=UndefinedMetricWarning) def test_auto_weight(): # Test class weights for imbalanced data diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py index 08c3b9f01e163..130c43b3ebeb2 100644 --- a/sklearn/tests/test_multiclass.py +++ b/sklearn/tests/test_multiclass.py @@ -187,6 +187,8 @@ def test_ovr_fit_predict_sparse(): assert_array_equal(dec_pred, clf_sprs.predict(X_test).toarray()) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_ovr_always_present(): # Test that ovr works with classes that are always present or absent. # Note: tests is the case where _ConstantPredictor is utilised @@ -244,6 +246,8 @@ def test_ovr_multiclass(): assert_array_equal(y_pred, [0, 0, 1]) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_ovr_binary(): # Toy dataset where features correspond directly to labels. X = np.array([[0, 0, 5], [0, 5, 0], [3, 0, 0], [0, 0, 6], [6, 0, 0]]) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 83e3794d78870..1eb5a7e48f823 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -1,5 +1,6 @@ from __future__ import division +import pytest import numpy as np import scipy.sparse as sp @@ -277,7 +278,8 @@ def test_multiclass_multioutput_estimator_predict_proba(): Y = np.concatenate([y1, y2], axis=1) - clf = MultiOutputClassifier(LogisticRegression(random_state=seed)) + clf = MultiOutputClassifier(LogisticRegression( + multi_class='ovr', solver='liblinear', random_state=seed)) clf.fit(X, Y) @@ -383,6 +385,8 @@ def test_classifier_chain_fit_and_predict_with_linear_svc(): assert not hasattr(classifier_chain, 'predict_proba') +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_classifier_chain_fit_and_predict_with_sparse_data(): # Fit classifier chain with sparse data X, Y = generate_multilabel_dataset_with_correlations() @@ -399,6 +403,8 @@ def test_classifier_chain_fit_and_predict_with_sparse_data(): assert_array_equal(Y_pred_sparse, Y_pred_dense) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_classifier_chain_vs_independent_models(): # Verify that an ensemble of classifier chains (each of length # N) can achieve a higher Jaccard similarity score than N independent @@ -421,6 +427,8 @@ def test_classifier_chain_vs_independent_models(): jaccard_similarity_score(Y_test, Y_pred_ovr)) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_base_chain_fit_and_predict(): # Fit base chain and verify predict performance X, Y = generate_multilabel_dataset_with_correlations() @@ -440,6 +448,8 @@ def test_base_chain_fit_and_predict(): assert isinstance(chains[1], ClassifierMixin) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_base_chain_fit_and_predict_with_sparse_data_and_cv(): # Fit base chain with sparse data cross_val_predict X, Y = generate_multilabel_dataset_with_correlations() @@ -452,6 +462,8 @@ def test_base_chain_fit_and_predict_with_sparse_data_and_cv(): assert_equal(Y_pred.shape, Y.shape) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_base_chain_random_order(): # Fit base chain with random order X, Y = generate_multilabel_dataset_with_correlations() @@ -472,6 +484,8 @@ def test_base_chain_random_order(): assert_array_almost_equal(est1.coef_, est2.coef_) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_base_chain_crossval_fit_and_predict(): # Fit chain with cross_val_predict and verify predict # performance diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index a36d3e17e31e9..8a15238ede1d3 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -6,6 +6,7 @@ import shutil import time +import pytest import numpy as np from scipy import sparse @@ -234,6 +235,8 @@ def test_pipeline_init_tuple(): pipe.score(X) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_pipeline_methods_anova(): # Test the various methods of the pipeline (anova). iris = load_iris() @@ -784,6 +787,8 @@ def test_feature_union_feature_names(): 'get_feature_names', ft.get_feature_names) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_classes_property(): iris = load_iris() X = iris.data @@ -887,6 +892,8 @@ def test_step_name_validation(): [[1]], [1]) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_set_params_nested_pipeline(): estimator = Pipeline([ ('a', Pipeline([ diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py index 1dfedad9bcd7c..c2d03595fb860 100644 --- a/sklearn/utils/tests/test_class_weight.py +++ b/sklearn/utils/tests/test_class_weight.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from sklearn.linear_model import LogisticRegression from sklearn.datasets import make_blobs @@ -65,6 +66,8 @@ def test_compute_class_weight_dict(): classes, y) +@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 +@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 def test_compute_class_weight_invariance(): # Test that results with class_weight="balanced" is invariant wrt # class imbalance if the number of samples is identical.