From 5dd561a918168f344a20d209d7bcdd7bb90e951a Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 12:24:29 -0500 Subject: [PATCH 01/23] removed warn_on_dtype --- sklearn/utils/tests/test_validation.py | 84 -------------------------- sklearn/utils/validation.py | 44 +------------- 2 files changed, 2 insertions(+), 126 deletions(-) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 56efb98a8b2d8..d97f0e0f08846 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -412,55 +412,20 @@ def test_check_array_dtype_stability(): def test_check_array_dtype_warning(): X_int_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] - X_float64 = np.asarray(X_int_list, dtype=np.float64) X_float32 = np.asarray(X_int_list, dtype=np.float32) X_int64 = np.asarray(X_int_list, dtype=np.int64) - X_csr_float64 = sp.csr_matrix(X_float64) X_csr_float32 = sp.csr_matrix(X_float32) X_csc_float32 = sp.csc_matrix(X_float32) X_csc_int32 = sp.csc_matrix(X_int64, dtype=np.int32) - y = [0, 0, 1] integer_data = [X_int64, X_csc_int32] - float64_data = [X_float64, X_csr_float64] float32_data = [X_float32, X_csr_float32, X_csc_float32] for X in integer_data: X_checked = assert_no_warnings(check_array, X, dtype=np.float64, accept_sparse=True) assert X_checked.dtype == np.float64 - - X_checked = assert_warns(DataConversionWarning, check_array, X, - dtype=np.float64, - accept_sparse=True, warn_on_dtype=True) - assert X_checked.dtype == np.float64 - - # Check that the warning message includes the name of the Estimator - X_checked = assert_warns_message(DataConversionWarning, - 'SomeEstimator', - check_array, X, - dtype=[np.float64, np.float32], - accept_sparse=True, - warn_on_dtype=True, - estimator='SomeEstimator') assert X_checked.dtype == np.float64 - - X_checked, y_checked = assert_warns_message( - DataConversionWarning, 'KNeighborsClassifier', - check_X_y, X, y, dtype=np.float64, accept_sparse=True, - warn_on_dtype=True, estimator=KNeighborsClassifier()) - assert X_checked.dtype == np.float64 - for X in float64_data: - with pytest.warns(None) as record: - warnings.simplefilter("ignore", FutureWarning) # 0.23 - X_checked = check_array(X, dtype=np.float64, - accept_sparse=True, warn_on_dtype=True) - assert X_checked.dtype == np.float64 - X_checked = check_array(X, dtype=np.float64, - accept_sparse=True, warn_on_dtype=False) - assert X_checked.dtype == np.float64 - assert len(record) == 0 - for X in float32_data: X_checked = assert_no_warnings(check_array, X, dtype=[np.float64, np.float32], @@ -484,17 +449,6 @@ def test_check_array_dtype_warning(): assert X_checked.format == 'csr' -def test_check_array_warn_on_dtype_deprecation(): - X = np.asarray([[0.0], [1.0]]) - Y = np.asarray([[2.0], [3.0]]) - with pytest.warns(FutureWarning, - match="'warn_on_dtype' is deprecated"): - check_array(X, warn_on_dtype=True) - with pytest.warns(FutureWarning, - match="'warn_on_dtype' is deprecated"): - check_X_y(X, Y, warn_on_dtype=True) - - def test_check_array_accept_sparse_type_exception(): X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) @@ -788,44 +742,6 @@ def test_check_array_series(): assert_array_equal(res, np.array(['a', 'b', 'c'], dtype=object)) -def test_check_dataframe_warns_on_dtype(): - # Check that warn_on_dtype also works for DataFrames. - # https://github.com/scikit-learn/scikit-learn/issues/10948 - pd = importorskip("pandas") - - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object) - assert_warns_message(DataConversionWarning, - "Data with input dtype object were all converted to " - "float64.", - check_array, df, dtype=np.float64, warn_on_dtype=True) - assert_warns(DataConversionWarning, check_array, df, - dtype='numeric', warn_on_dtype=True) - with pytest.warns(None) as record: - warnings.simplefilter("ignore", FutureWarning) # 0.23 - check_array(df, dtype='object', warn_on_dtype=True) - assert len(record) == 0 - - # Also check that it raises a warning for mixed dtypes in a DataFrame. - df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]]) - assert_warns(DataConversionWarning, check_array, df_mixed, - dtype=np.float64, warn_on_dtype=True) - assert_warns(DataConversionWarning, check_array, df_mixed, - dtype='numeric', warn_on_dtype=True) - assert_warns(DataConversionWarning, check_array, df_mixed, - dtype=object, warn_on_dtype=True) - - # Even with numerical dtypes, a conversion can be made because dtypes are - # uniformized throughout the array. - df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]]) - assert_warns(DataConversionWarning, check_array, df_mixed_numeric, - dtype='numeric', warn_on_dtype=True) - with pytest.warns(None) as record: - warnings.simplefilter("ignore", FutureWarning) # 0.23 - check_array(df_mixed_numeric.astype(int), - dtype='numeric', warn_on_dtype=True) - assert len(record) == 0 - - class DummyMemory: def cache(self, func): return func diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 424cf4b5180a3..bc92814c4b57a 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -339,7 +339,7 @@ def _ensure_no_complex_data(array): def check_array(array, accept_sparse=False, accept_large_sparse=True, dtype="numeric", order=None, copy=False, force_all_finite=True, ensure_2d=True, allow_nd=False, ensure_min_samples=1, - ensure_min_features=1, warn_on_dtype=None, estimator=None): + ensure_min_features=1, estimator=None): """Input validation on an array, list, sparse matrix or similar. @@ -414,14 +414,6 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True, dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0 disables this check. - warn_on_dtype : boolean or None, optional (default=None) - Raise DataConversionWarning if the dtype of the input data structure - does not match the requested dtype, causing a memory copy. - - .. deprecated:: 0.21 - ``warn_on_dtype`` is deprecated in version 0.21 and will be - removed in 0.23. - estimator : str or estimator instance (default=None) If passed, include the name of the estimator in warning messages. @@ -430,14 +422,6 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True, array_converted : object The converted and validated array. """ - # warn_on_dtype deprecation - if warn_on_dtype is not None: - warnings.warn( - "'warn_on_dtype' is deprecated in version 0.21 and will be " - "removed in 0.23. Don't set `warn_on_dtype` to remove this " - "warning.", - FutureWarning, stacklevel=2) - # store reference to original array to check if copy is needed when # function returns array_orig = array @@ -577,24 +561,9 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True, % (n_features, array.shape, ensure_min_features, context)) - if warn_on_dtype and dtype_orig is not None and array.dtype != dtype_orig: - msg = ("Data with input dtype %s was converted to %s%s." - % (dtype_orig, array.dtype, context)) - warnings.warn(msg, DataConversionWarning, stacklevel=2) - if copy and np.may_share_memory(array, array_orig): array = np.array(array, dtype=dtype, order=order) - if (warn_on_dtype and dtypes_orig is not None and - {array.dtype} != set(dtypes_orig)): - # if there was at the beginning some other types than the final one - # (for instance in a DataFrame that can contain several dtypes) then - # some data must have been converted - msg = ("Data with input dtype %s were all converted to %s%s." - % (', '.join(map(str, sorted(set(dtypes_orig)))), array.dtype, - context)) - warnings.warn(msg, DataConversionWarning, stacklevel=3) - return array @@ -621,7 +590,7 @@ def check_X_y(X, y, accept_sparse=False, accept_large_sparse=True, dtype="numeric", order=None, copy=False, force_all_finite=True, ensure_2d=True, allow_nd=False, multi_output=False, ensure_min_samples=1, ensure_min_features=1, y_numeric=False, - warn_on_dtype=None, estimator=None): + estimator=None): """Input validation for standard estimators. Checks X and y for consistent length, enforces X to be 2D and y 1D. By @@ -706,14 +675,6 @@ def check_X_y(X, y, accept_sparse=False, accept_large_sparse=True, it is converted to float64. Should only be used for regression algorithms. - warn_on_dtype : boolean or None, optional (default=None) - Raise DataConversionWarning if the dtype of the input data structure - does not match the requested dtype, causing a memory copy. - - .. deprecated:: 0.21 - ``warn_on_dtype`` is deprecated in version 0.21 and will be - removed in 0.23. - estimator : str or estimator instance (default=None) If passed, include the name of the estimator in warning messages. @@ -735,7 +696,6 @@ def check_X_y(X, y, accept_sparse=False, accept_large_sparse=True, ensure_2d=ensure_2d, allow_nd=allow_nd, ensure_min_samples=ensure_min_samples, ensure_min_features=ensure_min_features, - warn_on_dtype=warn_on_dtype, estimator=estimator) if multi_output: y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False, From e05e17a98853e6f9f04b0e42eebafffc8f7cc881 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 12:26:28 -0500 Subject: [PATCH 02/23] removed parameters to check_is_fitted --- sklearn/utils/tests/test_validation.py | 10 ---------- sklearn/utils/validation.py | 21 +-------------------- 2 files changed, 1 insertion(+), 30 deletions(-) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index d97f0e0f08846..1ce974dac45bd 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -679,16 +679,6 @@ def test_check_is_fitted(): assert check_is_fitted(ard) is None assert check_is_fitted(svr) is None - # to be removed in 0.23 - assert_warns_message( - FutureWarning, - "Passing attributes to check_is_fitted is deprecated", - check_is_fitted, ard, ['coef_']) - assert_warns_message( - FutureWarning, - "Passing all_or_any to check_is_fitted is deprecated", - check_is_fitted, ard, all_or_any=any) - def test_check_consistent_length(): check_consistent_length([1], [2], [3], [4], [5]) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index bc92814c4b57a..57509a33ba373 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -845,8 +845,7 @@ def check_symmetric(array, tol=1E-10, raise_warning=True, return array -def check_is_fitted(estimator, attributes='deprecated', msg=None, - all_or_any='deprecated'): +def check_is_fitted(estimator, msg=None): """Perform is_fitted validation for estimator. Checks if the estimator is fitted by verifying the presence of @@ -858,11 +857,6 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None, estimator : estimator instance. estimator instance for which the check is performed. - attributes : deprecated, ignored - .. deprecated:: 0.22 - `attributes` is deprecated, is currently ignored and will be removed - in 0.23. - msg : string The default error message is, "This %(name)s instance is not fitted yet. Call 'fit' with appropriate arguments before using this @@ -873,11 +867,6 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None, Eg. : "Estimator, %(name)s, must be fitted before sparsifying". - all_or_any : deprecated, ignored - .. deprecated:: 0.21 - `all_or_any` is deprecated, is currently ignored and will be removed - in 0.23. - Returns ------- None @@ -887,14 +876,6 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None, NotFittedError If the attributes are not found. """ - if attributes != 'deprecated': - warnings.warn("Passing attributes to check_is_fitted is deprecated" - " and will be removed in 0.23. The attributes " - "argument is ignored.", FutureWarning) - if all_or_any != 'deprecated': - warnings.warn("Passing all_or_any to check_is_fitted is deprecated" - " and will be removed in 0.23. The any_or_all " - "argument is ignored.", FutureWarning) if isclass(estimator): raise TypeError("{} is a class, not an instance.".format(estimator)) if msg is None: From cdfac1ef341ace3372a1d1b4d0e2c01e6bfe1530 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 14:21:59 -0500 Subject: [PATCH 03/23] all_estimators parameters --- sklearn/utils/__init__.py | 40 +-------------------------------------- sklearn/utils/_testing.py | 39 +------------------------------------- 2 files changed, 2 insertions(+), 77 deletions(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 4d4ef606341ca..c864911626b27 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -1108,9 +1108,7 @@ def check_pandas_support(caller_name): ) from e -def all_estimators(include_meta_estimators=None, - include_other=None, type_filter=None, - include_dont_test=None): +def all_estimators(type_filter=None): """Get a list of all estimators from sklearn. This function crawls the module and gets all classes that inherit @@ -1120,20 +1118,6 @@ def all_estimators(include_meta_estimators=None, Parameters ---------- - include_meta_estimators : boolean, default=False - Deprecated, ignored. - - .. deprecated:: 0.21 - ``include_meta_estimators`` has been deprecated and has no effect in - 0.21 and will be removed in 0.23. - - include_other : boolean, default=False - Deprecated, ignored. - - .. deprecated:: 0.21 - ``include_other`` has been deprecated and has not effect in 0.21 and - will be removed in 0.23. - type_filter : string, list of string, or None, default=None Which kind of estimators should be returned. If None, no filter is applied and all estimators are returned. Possible values are @@ -1141,13 +1125,6 @@ def all_estimators(include_meta_estimators=None, estimators only of these specific types, or a list of these to get the estimators that fit at least one of the types. - include_dont_test : boolean, default=False - Deprecated, ignored. - - .. deprecated:: 0.21 - ``include_dont_test`` has been deprecated and has no effect in 0.21 - and will be removed in 0.23. - Returns ------- estimators : list of tuples @@ -1167,21 +1144,6 @@ def is_abstract(c): return False return True - if include_other is not None: - warnings.warn("include_other was deprecated in version 0.21," - " has no effect and will be removed in 0.23", - DeprecationWarning) - - if include_dont_test is not None: - warnings.warn("include_dont_test was deprecated in version 0.21," - " has no effect and will be removed in 0.23", - DeprecationWarning) - - if include_meta_estimators is not None: - warnings.warn("include_meta_estimators was deprecated in version 0.21," - " has no effect and will be removed in 0.23", - DeprecationWarning) - all_classes = [] # get parent folder path = sklearn.__path__ diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py index 806f302b78288..4e4e6043eae3d 100644 --- a/sklearn/utils/_testing.py +++ b/sklearn/utils/_testing.py @@ -438,9 +438,7 @@ def assert_allclose_dense_sparse(x, y, rtol=1e-07, atol=1e-9, err_msg=''): # TODO: Remove in 0.24. This class is now in utils.__init__. -def all_estimators(include_meta_estimators=None, - include_other=None, type_filter=None, - include_dont_test=None): +def all_estimators(type_filter=None): """Get a list of all estimators from sklearn. This function crawls the module and gets all classes that inherit @@ -450,19 +448,6 @@ def all_estimators(include_meta_estimators=None, Parameters ---------- - include_meta_estimators : boolean, default=False - Deprecated, ignored. - - .. deprecated:: 0.21 - ``include_meta_estimators`` has been deprecated and has no effect in - 0.21 and will be removed in 0.23. - - include_other : boolean, default=False - Deprecated, ignored. - - .. deprecated:: 0.21 - ``include_other`` has been deprecated and has not effect in 0.21 and - will be removed in 0.23. type_filter : string, list of string, or None, default=None Which kind of estimators should be returned. If None, no filter is @@ -471,13 +456,6 @@ def all_estimators(include_meta_estimators=None, estimators only of these specific types, or a list of these to get the estimators that fit at least one of the types. - include_dont_test : boolean, default=False - Deprecated, ignored. - - .. deprecated:: 0.21 - ``include_dont_test`` has been deprecated and has no effect in 0.21 - and will be removed in 0.23. - Returns ------- estimators : list of tuples @@ -491,21 +469,6 @@ def is_abstract(c): return False return True - if include_other is not None: - warnings.warn("include_other was deprecated in version 0.21," - " has no effect and will be removed in 0.23", - FutureWarning) - - if include_dont_test is not None: - warnings.warn("include_dont_test was deprecated in version 0.21," - " has no effect and will be removed in 0.23", - FutureWarning) - - if include_meta_estimators is not None: - warnings.warn("include_meta_estimators was deprecated in version 0.21," - " has no effect and will be removed in 0.23", - FutureWarning) - all_classes = [] # get parent folder path = sklearn.__path__ From ef5d5708d2bf389611281a82defbe87ab89e7ad0 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 14:23:26 -0500 Subject: [PATCH 04/23] deprecated n_components attribute in AgglomerativeClustering --- sklearn/cluster/_hierarchical.py | 7 ------- sklearn/cluster/tests/test_hierarchical.py | 14 -------------- 2 files changed, 21 deletions(-) diff --git a/sklearn/cluster/_hierarchical.py b/sklearn/cluster/_hierarchical.py index f553a9e505eb5..9cb80747fbc20 100644 --- a/sklearn/cluster/_hierarchical.py +++ b/sklearn/cluster/_hierarchical.py @@ -787,13 +787,6 @@ def __init__(self, n_clusters=2, affinity="euclidean", self.linkage = linkage self.affinity = affinity - @deprecated("The ``n_components_`` attribute was deprecated " - "in favor of ``n_connected_components_`` in 0.21 " - "and will be removed in 0.23.") - @property - def n_components_(self): - return self.n_connected_components_ - def fit(self, X, y=None): """Fit the hierarchical clustering from features, or distance matrix. diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index 06e2561df5de7..49d102a57e4f3 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -750,17 +750,3 @@ def test_dist_threshold_invalid_parameters(): AgglomerativeClustering(n_clusters=None, distance_threshold=1, compute_full_tree=False).fit(X) - - -def test_n_components_deprecation(): - # Test that a Deprecation warning is thrown when n_components_ - # attribute is accessed - - X = np.array([[1, 2], [1, 4], [1, 0], [4, 2]]) - agc = AgglomerativeClustering().fit(X) - - match = ("``n_components_`` attribute was deprecated " - "in favor of ``n_connected_components_``") - with pytest.warns(FutureWarning, match=match): - n = agc.n_components_ - assert n == agc.n_connected_components_ From 66716828473ba83df208847322a9b76b78a2e54c Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 14:38:52 -0500 Subject: [PATCH 05/23] change default of base.score for multioutput --- sklearn/base.py | 28 ++++--------------- sklearn/cross_decomposition/tests/test_pls.py | 1 - .../tests/test_coordinate_descent.py | 1 - sklearn/linear_model/tests/test_ransac.py | 2 -- sklearn/linear_model/tests/test_ridge.py | 1 - sklearn/model_selection/tests/test_search.py | 1 - sklearn/neural_network/tests/test_mlp.py | 1 - sklearn/tests/test_base.py | 23 --------------- sklearn/tests/test_dummy.py | 1 - 9 files changed, 6 insertions(+), 53 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index 050bb4e2a522b..1765b3e0941e5 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -406,34 +406,18 @@ def score(self, X, y, sample_weight=None): Notes ----- - The R2 score used when calling ``score`` on a regressor will use + The R2 score used when calling ``score`` on a regressor uses ``multioutput='uniform_average'`` from version 0.23 to keep consistent - with :func:`~sklearn.metrics.r2_score`. This will influence the - ``score`` method of all the multioutput regressors (except for - :class:`~sklearn.multioutput.MultiOutputRegressor`). To specify the - default value manually and avoid the warning, please either call - :func:`~sklearn.metrics.r2_score` directly or make a custom scorer with - :func:`~sklearn.metrics.make_scorer` (the built-in scorer ``'r2'`` uses - ``multioutput='uniform_average'``). + with default value of :func:`~sklearn.metrics.r2_score`. + This influences the ``score`` method of all the multioutput + regressors (except for + :class:`~sklearn.multioutput.MultiOutputRegressor`). """ from .metrics import r2_score from .metrics._regression import _check_reg_targets y_pred = self.predict(X) - # XXX: Remove the check in 0.23 - y_type, _, _, _ = _check_reg_targets(y, y_pred, None) - if y_type == 'continuous-multioutput': - warnings.warn("The default value of multioutput (not exposed in " - "score method) will change from 'variance_weighted' " - "to 'uniform_average' in 0.23 to keep consistent " - "with 'metrics.r2_score'. To specify the default " - "value manually and avoid the warning, please " - "either call 'metrics.r2_score' directly or make a " - "custom scorer with 'metrics.make_scorer' (the " - "built-in scorer 'r2' uses " - "multioutput='uniform_average').", FutureWarning) - return r2_score(y, y_pred, sample_weight=sample_weight, - multioutput='variance_weighted') + return r2_score(y, y_pred, sample_weight=sample_weight) class ClusterMixin: diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py index 13c55fbd135d0..2d788a2cf6271 100644 --- a/sklearn/cross_decomposition/tests/test_pls.py +++ b/sklearn/cross_decomposition/tests/test_pls.py @@ -426,7 +426,6 @@ def test_pls_errors(): clf.fit, X, Y) -@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23 def test_pls_scaling(): # sanity check for scale=True n_samples = 1000 diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index a739c876fa77f..ab9594c4d0567 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -229,7 +229,6 @@ def test_lasso_path_return_models_vs_new_return_gives_same_coefficients(): decimal=1) -@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23 def test_enet_path(): # We use a large number of samples and of informative features so that # the l1_ratio selected is more toward ridge than lasso diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py index 83f688c95692e..62b68566db22c 100644 --- a/sklearn/linear_model/tests/test_ransac.py +++ b/sklearn/linear_model/tests/test_ransac.py @@ -332,7 +332,6 @@ def test_ransac_min_n_samples(): assert_raises(ValueError, ransac_estimator7.fit, X, y) -@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23 def test_ransac_multi_dimensional_targets(): base_estimator = LinearRegression() @@ -353,7 +352,6 @@ def test_ransac_multi_dimensional_targets(): assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) -@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23 def test_ransac_residual_loss(): loss_multi1 = lambda y_true, y_pred: np.sum(np.abs(y_true - y_pred), axis=1) loss_multi2 = lambda y_true, y_pred: np.sum((y_true - y_pred) ** 2, axis=1) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index c786b154fcb85..4d17c58ee1176 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -720,7 +720,6 @@ def check_dense_sparse(test_func): assert_array_almost_equal(ret_dense, ret_sparse, decimal=3) -@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23 @pytest.mark.parametrize( 'test_func', (_test_ridge_loo, _test_ridge_cv, _test_ridge_cv_normalize, diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index fc6183f3a1f0b..056927bee75d0 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -1358,7 +1358,6 @@ def test_pickle(): random_search_pickled.predict(X)) -@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23 def test_grid_search_with_multioutput_data(): # Test search with multi-output estimator diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py index 53f69b79edb40..09a01ad69dbdd 100644 --- a/sklearn/neural_network/tests/test_mlp.py +++ b/sklearn/neural_network/tests/test_mlp.py @@ -345,7 +345,6 @@ def test_multilabel_classification(): mlp.fit(X, y).predict(X) -@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23 def test_multioutput_regression(): # Test that multi-output regression works as expected X, y = make_regression(n_samples=200, n_targets=5) diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index 155dbcaaa1f6c..f480fffda1571 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -490,29 +490,6 @@ def test_tag_inheritance(): assert inherit_diamond_tag_est._get_tags()['allow_nan'] -# XXX: Remove in 0.23 -def test_regressormixin_score_multioutput(): - from sklearn.linear_model import LinearRegression - # no warnings when y_type is continuous - X = [[1], [2], [3]] - y = [1, 2, 3] - reg = LinearRegression().fit(X, y) - assert_no_warnings(reg.score, X, y) - # warn when y_type is continuous-multioutput - y = [[1, 2], [2, 3], [3, 4]] - reg = LinearRegression().fit(X, y) - msg = ("The default value of multioutput (not exposed in " - "score method) will change from 'variance_weighted' " - "to 'uniform_average' in 0.23 to keep consistent " - "with 'metrics.r2_score'. To specify the default " - "value manually and avoid the warning, please " - "either call 'metrics.r2_score' directly or make a " - "custom scorer with 'metrics.make_scorer' (the " - "built-in scorer 'r2' uses " - "multioutput='uniform_average').") - assert_warns_message(FutureWarning, msg, reg.score, X, y) - - def test_warns_on_get_params_non_attribute(): class MyEstimator(BaseEstimator): def __init__(self, param=5): diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py index 55f3abc77b0de..0d4addb48e64d 100644 --- a/sklearn/tests/test_dummy.py +++ b/sklearn/tests/test_dummy.py @@ -708,7 +708,6 @@ def test_dummy_regressor_return_std(): assert_array_equal(y_pred_list[1], y_std_expected) -@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23 @pytest.mark.parametrize("y,y_test", [ ([1, 1, 1, 2], [1.25] * 4), (np.array([[2, 2], From b5fe8114c07b36b228c2b5b9f0906a50f855ac16 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 14:54:14 -0500 Subject: [PATCH 06/23] removed lots of useless decorators? --- .../decomposition/tests/test_kernel_pca.py | 6 ----- .../tests/test_from_model.py | 8 ------- sklearn/linear_model/tests/test_huber.py | 2 -- .../tests/test_passive_aggressive.py | 24 ------------------- sklearn/linear_model/tests/test_perceptron.py | 4 ---- sklearn/linear_model/tests/test_sgd.py | 5 ---- .../model_selection/tests/test_validation.py | 5 ---- sklearn/tests/test_multiclass.py | 6 ----- sklearn/tests/test_multioutput.py | 10 -------- 9 files changed, 70 deletions(-) diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py index 39fc16b5ff5fb..a08ae0cb7a43a 100644 --- a/sklearn/decomposition/tests/test_kernel_pca.py +++ b/sklearn/decomposition/tests/test_kernel_pca.py @@ -215,8 +215,6 @@ def test_kernel_pca_invalid_kernel(): kpca.fit(X_fit) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_gridsearch_pipeline(): # Test if we can do a grid-search to find parameters to separate # circles with a perceptron model. @@ -231,8 +229,6 @@ def test_gridsearch_pipeline(): assert grid_search.best_score_ == 1 -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_gridsearch_pipeline_precomputed(): # Test if we can do a grid-search to find parameters to separate # circles with a perceptron model using a precomputed kernel. @@ -248,8 +244,6 @@ def test_gridsearch_pipeline_precomputed(): assert grid_search.best_score_ == 1 -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_nested_circles(): # Test the linear separability of the first 2D KPCA transform X, y = make_circles(n_samples=400, factor=.3, noise=.05, diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index 57bd88a30eb0e..89c1777b8c32c 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -37,8 +37,6 @@ def _more_tags(self): rng = np.random.RandomState(0) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_invalid_input(): clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True, random_state=None, tol=None) @@ -252,8 +250,6 @@ def test_2d_coef(): assert_array_almost_equal(X_new, X[:, feature_mask]) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_partial_fit(): est = PassiveAggressiveClassifier(random_state=0, shuffle=False, max_iter=5, tol=None) @@ -284,8 +280,6 @@ def test_calling_fit_reinitializes(): assert transformer.estimator_.C == 100 -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_prefit(): # Test all possible combinations of the prefit parameter. @@ -325,8 +319,6 @@ def test_threshold_string(): assert_array_almost_equal(X_transform, data[:, mask]) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_threshold_without_refitting(): # Test that the threshold can be set without refitting the model. clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True, diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py index 78fa0f3b1cd14..cb70db88d3d41 100644 --- a/sklearn/linear_model/tests/test_huber.py +++ b/sklearn/linear_model/tests/test_huber.py @@ -143,8 +143,6 @@ def test_huber_scaling_invariant(): assert_array_equal(n_outliers_mask_3, n_outliers_mask_1) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_huber_and_sgd_same_results(): # Test they should converge to same coefficients for same parameters diff --git a/sklearn/linear_model/tests/test_passive_aggressive.py b/sklearn/linear_model/tests/test_passive_aggressive.py index 5da9883cba369..34fe8334211b4 100644 --- a/sklearn/linear_model/tests/test_passive_aggressive.py +++ b/sklearn/linear_model/tests/test_passive_aggressive.py @@ -67,8 +67,6 @@ def project(self, X): return np.dot(X, self.w) + self.b -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_classifier_accuracy(): for data in (X, X_csr): for fit_intercept in (True, False): @@ -86,8 +84,6 @@ def test_classifier_accuracy(): assert hasattr(clf, 'standard_coef_') -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_classifier_partial_fit(): classes = np.unique(y) for data in (X, X_csr): @@ -105,8 +101,6 @@ def test_classifier_partial_fit(): assert hasattr(clf, 'standard_coef_') -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_classifier_refit(): # Classifier can be retrained on different labels and features. clf = PassiveAggressiveClassifier(max_iter=5).fit(X, y) @@ -116,8 +110,6 @@ def test_classifier_refit(): assert_array_equal(clf.classes_, iris.target_names) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') @pytest.mark.parametrize('loss', ("hinge", "squared_hinge")) def test_classifier_correctness(loss): y_bin = y.copy() @@ -140,8 +132,6 @@ def test_classifier_undefined_methods(): assert_raises(AttributeError, lambda x: getattr(clf, x), meth) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_class_weights(): # Test class weights. X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], @@ -164,16 +154,12 @@ def test_class_weights(): assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1])) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_partial_fit_weight_class_balanced(): # partial_fit with class_weight='balanced' not supported clf = PassiveAggressiveClassifier(class_weight="balanced", max_iter=100) assert_raises(ValueError, clf.partial_fit, X, y, classes=np.unique(y)) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_equal_class_weight(): X2 = [[1, 0], [1, 0], [0, 1], [0, 1]] y2 = [0, 0, 1, 1] @@ -195,8 +181,6 @@ def test_equal_class_weight(): assert_almost_equal(clf.coef_, clf_balanced.coef_, decimal=2) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_wrong_class_weight_label(): # ValueError due to wrong class_weight label. X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], @@ -207,8 +191,6 @@ def test_wrong_class_weight_label(): assert_raises(ValueError, clf.fit, X2, y2) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_wrong_class_weight_format(): # ValueError due to wrong class_weight argument type. X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], @@ -222,8 +204,6 @@ def test_wrong_class_weight_format(): assert_raises(ValueError, clf.fit, X2, y2) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_regressor_mse(): y_bin = y.copy() y_bin[y != 1] = -1 @@ -244,8 +224,6 @@ def test_regressor_mse(): assert hasattr(reg, 'standard_coef_') -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_regressor_partial_fit(): y_bin = y.copy() y_bin[y != 1] = -1 @@ -265,8 +243,6 @@ def test_regressor_partial_fit(): assert hasattr(reg, 'standard_coef_') -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') @pytest.mark.parametrize( 'loss', ("epsilon_insensitive", "squared_epsilon_insensitive")) diff --git a/sklearn/linear_model/tests/test_perceptron.py b/sklearn/linear_model/tests/test_perceptron.py index ffbd844b902f2..6cdd538ca9247 100644 --- a/sklearn/linear_model/tests/test_perceptron.py +++ b/sklearn/linear_model/tests/test_perceptron.py @@ -43,8 +43,6 @@ def predict(self, X): return np.sign(self.project(X)) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_perceptron_accuracy(): for data in (X, X_csr): clf = Perceptron(max_iter=100, tol=None, shuffle=False) @@ -53,8 +51,6 @@ def test_perceptron_accuracy(): assert score > 0.7 -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_perceptron_correctness(): y_bin = y.copy() y_bin[y != 1] = -1 diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index f462a1fb4a040..1d7c582c51a7d 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -24,11 +24,6 @@ from sklearn.model_selection import RandomizedSearchCV -# 0.23. warning about tol not having its correct default value. -pytestmark = pytest.mark.filterwarnings( - "ignore:max_iter and tol parameters have been") - - def _update_kwargs(kwargs): if "random_state" not in kwargs: kwargs["random_state"] = 42 diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index aaf4f497f1585..c72ac0c1b7a14 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -1098,8 +1098,6 @@ def test_learning_curve_incremental_learning_unsupervised(): np.linspace(0.1, 1.0, 10)) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_learning_curve_batch_and_incremental_learning_are_equal(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, @@ -1167,8 +1165,6 @@ def test_learning_curve_with_boolean_indices(): np.linspace(0.1, 1.0, 10)) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_learning_curve_with_shuffle(): # Following test case was designed this way to verify the code # changes made in pull request: #7506. @@ -1411,7 +1407,6 @@ def test_cross_val_predict_with_method(): LogisticRegression(solver="liblinear")) -@pytest.mark.filterwarnings('ignore: max_iter and tol parameters') def test_cross_val_predict_method_checking(): # Regression test for issue #9639. Tests that cross_val_predict does not # check estimator methods (e.g. predict_proba) before fitting diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py index ef0aa888f2ab9..33eb5da939725 100644 --- a/sklearn/tests/test_multiclass.py +++ b/sklearn/tests/test_multiclass.py @@ -76,8 +76,6 @@ def test_ovr_fit_predict(): assert np.mean(iris.target == pred) > 0.65 -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_ovr_partial_fit(): # Test if partial_fit is working as intended X, y = shuffle(iris.data, iris.target, random_state=0) @@ -602,8 +600,6 @@ def test_ovo_gridsearch(): assert best_C in Cs -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_ovo_ties(): # Test that ties are broken using the decision function, # not defaulting to the smallest label @@ -629,8 +625,6 @@ def test_ovo_ties(): assert ovo_prediction[0] == normalized_confidences[0].argmax() -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_ovo_ties2(): # test that ties can not only be won by the first two labels X = np.array([[1, 2], [2, 1], [-2, 1], [-2, -1]]) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index cd87ad3fc863d..6256f72a4b0b3 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -50,8 +50,6 @@ def test_multi_target_regression(): assert_almost_equal(references, y_pred) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_multi_target_regression_partial_fit(): X, y = datasets.make_regression(n_targets=3) X_train, y_train = X[:50], y[:50] @@ -113,8 +111,6 @@ def test_multi_target_sample_weights_api(): rgr.fit(X, y, w) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_multi_target_sample_weight_partial_fit(): # weighted regressor X = [[1, 2, 3], [4, 5, 6]] @@ -219,8 +215,6 @@ def custom_scorer(estimator, X, y): multi_target_linear.predict_proba(X) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_multi_output_classification_partial_fit(): # test if multi_target initializes correctly with base estimator and fit # assert predictions work as expected for predict @@ -252,8 +246,6 @@ def test_multi_output_classification_partial_fit(): assert_array_equal(sgd_linear_clf.predict(X), second_predictions[:, i]) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_multi_output_classification_partial_fit_no_first_classes_exception(): sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5) multi_target_linear = MultiOutputClassifier(sgd_linear_clf) @@ -368,8 +360,6 @@ def test_multi_output_classification_sample_weights(): assert_almost_equal(clf.predict(X_test), clf_w.predict(X_test)) -# 0.23. warning about tol not having its correct default value. -@pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') def test_multi_output_classification_partial_fit_sample_weights(): # weighted classifier Xw = [[1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]] From 53043436875d35ab7acbda488876c6b5b91c2ad9 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 14:58:33 -0500 Subject: [PATCH 07/23] changed default of copy in quantil_transform --- sklearn/preprocessing/_data.py | 23 ++--------------------- sklearn/preprocessing/tests/test_data.py | 14 -------------- 2 files changed, 2 insertions(+), 35 deletions(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index ef8b9c6db9e3b..a9fada3345348 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -2543,7 +2543,7 @@ def quantile_transform(X, axis=0, n_quantiles=1000, ignore_implicit_zeros=False, subsample=int(1e5), random_state=None, - copy="warn"): + copy=True): """Transform features using quantiles information. This method transforms the features to follow a uniform or a normal @@ -2601,19 +2601,11 @@ def quantile_transform(X, axis=0, n_quantiles=1000, by np.random. Note that this is used by subsampling and smoothing noise. - copy : boolean, optional, (default="warn") + copy : boolean, optional, (default=True) Set to False to perform inplace transformation and avoid a copy (if the input is already a numpy array). If True, a copy of `X` is transformed, leaving the original `X` unchanged - .. deprecated:: 0.21 - The default value of parameter `copy` will be changed from False - to True in 0.23. The current default of False is being changed to - make it more consistent with the default `copy` values of other - functions in :mod:`sklearn.preprocessing`. Furthermore, the - current default of False may have unexpected side effects by - modifying the value of `X` inplace - Returns ------- Xt : ndarray or sparse matrix, shape (n_samples, n_features) @@ -2649,17 +2641,6 @@ def quantile_transform(X, axis=0, n_quantiles=1000, see :ref:`examples/preprocessing/plot_all_scaling.py `. """ - if copy == "warn": - warnings.warn("The default value of `copy` will change from False to " - "True in 0.23 in order to make it more consistent with " - "the default `copy` values of other functions in " - ":mod:`sklearn.preprocessing` and prevent " - "unexpected side effects by modifying the value of `X` " - "inplace. To avoid inplace modifications of `X`, it is " - "recommended to explicitly set `copy=True`", - FutureWarning) - copy = False - n = QuantileTransformer(n_quantiles=n_quantiles, output_distribution=output_distribution, subsample=subsample, diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 060719200fa99..a67c101dec499 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -1453,7 +1453,6 @@ def test_quantile_transform_sparse_toy(): assert_array_almost_equal(X.toarray(), X_trans_inv.toarray()) -@pytest.mark.filterwarnings("ignore: The default value of `copy`") # 0.23 def test_quantile_transform_axis1(): X = np.array([[0, 25, 50, 75, 100], [2, 4, 6, 8, 10], @@ -1533,18 +1532,6 @@ def test_quantile_transform_nan(): assert not np.isnan(transformer.quantiles_[:, 1:]).any() -def test_deprecated_quantile_transform_copy(): - future_message = ("The default value of `copy` will change from False to " - "True in 0.23 in order to make it more consistent with " - "the default `copy` values of other functions in " - ":mod:`sklearn.preprocessing` and prevent " - "unexpected side effects by modifying the value of `X` " - "inplace. To avoid inplace modifications of `X`, it is " - "recommended to explicitly set `copy=True`") - assert_warns_message(FutureWarning, future_message, quantile_transform, - np.array([[0, 1], [0, 0.5], [1, 0]])) - - def test_robust_scaler_invalid_range(): for range_ in [ (-1, 90), @@ -2163,7 +2150,6 @@ def test_fit_cold_start(): scaler.fit_transform(X_2d) -@pytest.mark.filterwarnings("ignore: The default value of `copy`") # 0.23 def test_quantile_transform_valid_axis(): X = np.array([[0, 25, 50, 75, 100], [2, 4, 6, 8, 10], From 226db87474680ee9ec6d2340232be0989029c7ed Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 15:00:49 -0500 Subject: [PATCH 08/23] removed six.py --- sklearn/externals/six.py | 583 --------------------------------------- 1 file changed, 583 deletions(-) delete mode 100644 sklearn/externals/six.py diff --git a/sklearn/externals/six.py b/sklearn/externals/six.py deleted file mode 100644 index 26d95f7df9abc..0000000000000 --- a/sklearn/externals/six.py +++ /dev/null @@ -1,583 +0,0 @@ -"""Utilities for writing code that runs on Python 2 and 3""" - -# Copyright (c) 2010-2013 Benjamin Peterson -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import operator -import sys -import types - -import warnings -warnings.warn("The module is deprecated in version 0.21 and will be removed " - "in version 0.23 since we've dropped support for Python 2.7. " - "Please rely on the official version of six " - "(https://pypi.org/project/six/).", FutureWarning) - -__author__ = "Benjamin Peterson " -__version__ = "1.4.1" - - -# Useful for very coarse version differentiation. -PY2 = sys.version_info[0] == 2 -PY3 = sys.version_info[0] == 3 - -if PY3: - string_types = str, - integer_types = int, - class_types = type, - text_type = str - binary_type = bytes - - MAXSIZE = sys.maxsize -else: - string_types = basestring, - integer_types = (int, long) - class_types = (type, types.ClassType) - text_type = unicode - binary_type = str - - if sys.platform.startswith("java"): - # Jython always uses 32 bits. - MAXSIZE = int((1 << 31) - 1) - else: - # It's possible to have sizeof(long) != sizeof(Py_ssize_t). - class X(object): - def __len__(self): - return 1 << 31 - try: - len(X()) - except OverflowError: - # 32-bit - MAXSIZE = int((1 << 31) - 1) - else: - # 64-bit - MAXSIZE = int((1 << 63) - 1) - del X - - -def _add_doc(func, doc): - """Add documentation to a function.""" - func.__doc__ = doc - - -def _import_module(name): - """Import module, returning the module after the last dot.""" - __import__(name) - return sys.modules[name] - - -class _LazyDescr(object): - - def __init__(self, name): - self.name = name - - def __get__(self, obj, tp): - result = self._resolve() - setattr(obj, self.name, result) - # This is a bit ugly, but it avoids running this again. - delattr(tp, self.name) - return result - - -class MovedModule(_LazyDescr): - - def __init__(self, name, old, new=None): - super(MovedModule, self).__init__(name) - if PY3: - if new is None: - new = name - self.mod = new - else: - self.mod = old - - def _resolve(self): - return _import_module(self.mod) - - -class MovedAttribute(_LazyDescr): - - def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): - super(MovedAttribute, self).__init__(name) - if PY3: - if new_mod is None: - new_mod = name - self.mod = new_mod - if new_attr is None: - if old_attr is None: - new_attr = name - else: - new_attr = old_attr - self.attr = new_attr - else: - self.mod = old_mod - if old_attr is None: - old_attr = name - self.attr = old_attr - - def _resolve(self): - module = _import_module(self.mod) - return getattr(module, self.attr) - - - -class _MovedItems(types.ModuleType): - """Lazy loading of moved objects""" - - -_moved_attributes = [ - MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), - MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), - MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"), - MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), - MovedAttribute("map", "itertools", "builtins", "imap", "map"), - MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), - MovedAttribute("reload_module", "__builtin__", "imp", "reload"), - MovedAttribute("reduce", "__builtin__", "functools"), - MovedAttribute("StringIO", "StringIO", "io"), - MovedAttribute("UserString", "UserString", "collections"), - MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), - MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), - MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"), - - MovedModule("builtins", "__builtin__"), - MovedModule("configparser", "ConfigParser"), - MovedModule("copyreg", "copy_reg"), - MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), - MovedModule("http_cookies", "Cookie", "http.cookies"), - MovedModule("html_entities", "htmlentitydefs", "html.entities"), - MovedModule("html_parser", "HTMLParser", "html.parser"), - MovedModule("http_client", "httplib", "http.client"), - MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), - MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), - MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), - MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), - MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), - MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), - MovedModule("cPickle", "cPickle", "pickle"), - MovedModule("queue", "Queue"), - MovedModule("reprlib", "repr"), - MovedModule("socketserver", "SocketServer"), - MovedModule("tkinter", "Tkinter"), - MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), - MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), - MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), - MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), - MovedModule("tkinter_tix", "Tix", "tkinter.tix"), - MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), - MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), - MovedModule("tkinter_colorchooser", "tkColorChooser", - "tkinter.colorchooser"), - MovedModule("tkinter_commondialog", "tkCommonDialog", - "tkinter.commondialog"), - MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), - MovedModule("tkinter_font", "tkFont", "tkinter.font"), - MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), - MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", - "tkinter.simpledialog"), - MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"), - MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"), - MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"), - MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), - MovedModule("winreg", "_winreg"), -] -for attr in _moved_attributes: - setattr(_MovedItems, attr.name, attr) -del attr - -moves = sys.modules[__name__ + ".moves"] = _MovedItems(__name__ + ".moves") - - - -class Module_six_moves_urllib_parse(types.ModuleType): - """Lazy loading of moved objects in six.moves.urllib_parse""" - - -_urllib_parse_moved_attributes = [ - MovedAttribute("ParseResult", "urlparse", "urllib.parse"), - MovedAttribute("parse_qs", "urlparse", "urllib.parse"), - MovedAttribute("parse_qsl", "urlparse", "urllib.parse"), - MovedAttribute("urldefrag", "urlparse", "urllib.parse"), - MovedAttribute("urljoin", "urlparse", "urllib.parse"), - MovedAttribute("urlparse", "urlparse", "urllib.parse"), - MovedAttribute("urlsplit", "urlparse", "urllib.parse"), - MovedAttribute("urlunparse", "urlparse", "urllib.parse"), - MovedAttribute("urlunsplit", "urlparse", "urllib.parse"), - MovedAttribute("quote", "urllib", "urllib.parse"), - MovedAttribute("quote_plus", "urllib", "urllib.parse"), - MovedAttribute("unquote", "urllib", "urllib.parse"), - MovedAttribute("unquote_plus", "urllib", "urllib.parse"), - MovedAttribute("urlencode", "urllib", "urllib.parse"), -] -for attr in _urllib_parse_moved_attributes: - setattr(Module_six_moves_urllib_parse, attr.name, attr) -del attr - -sys.modules[__name__ + ".moves.urllib_parse"] = Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse") -sys.modules[__name__ + ".moves.urllib.parse"] = Module_six_moves_urllib_parse(__name__ + ".moves.urllib.parse") - - -class Module_six_moves_urllib_error(types.ModuleType): - """Lazy loading of moved objects in six.moves.urllib_error""" - - -_urllib_error_moved_attributes = [ - MovedAttribute("URLError", "urllib2", "urllib.error"), - MovedAttribute("HTTPError", "urllib2", "urllib.error"), - MovedAttribute("ContentTooShortError", "urllib", "urllib.error"), -] -for attr in _urllib_error_moved_attributes: - setattr(Module_six_moves_urllib_error, attr.name, attr) -del attr - -sys.modules[__name__ + ".moves.urllib_error"] = Module_six_moves_urllib_error(__name__ + ".moves.urllib_error") -sys.modules[__name__ + ".moves.urllib.error"] = Module_six_moves_urllib_error(__name__ + ".moves.urllib.error") - - -class Module_six_moves_urllib_request(types.ModuleType): - """Lazy loading of moved objects in six.moves.urllib_request""" - - -_urllib_request_moved_attributes = [ - MovedAttribute("urlopen", "urllib2", "urllib.request"), - MovedAttribute("install_opener", "urllib2", "urllib.request"), - MovedAttribute("build_opener", "urllib2", "urllib.request"), - MovedAttribute("pathname2url", "urllib", "urllib.request"), - MovedAttribute("url2pathname", "urllib", "urllib.request"), - MovedAttribute("getproxies", "urllib", "urllib.request"), - MovedAttribute("Request", "urllib2", "urllib.request"), - MovedAttribute("OpenerDirector", "urllib2", "urllib.request"), - MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"), - MovedAttribute("ProxyHandler", "urllib2", "urllib.request"), - MovedAttribute("BaseHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"), - MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"), - MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"), - MovedAttribute("FileHandler", "urllib2", "urllib.request"), - MovedAttribute("FTPHandler", "urllib2", "urllib.request"), - MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"), - MovedAttribute("UnknownHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"), - MovedAttribute("urlretrieve", "urllib", "urllib.request"), - MovedAttribute("urlcleanup", "urllib", "urllib.request"), - MovedAttribute("URLopener", "urllib", "urllib.request"), - MovedAttribute("FancyURLopener", "urllib", "urllib.request"), -] -for attr in _urllib_request_moved_attributes: - setattr(Module_six_moves_urllib_request, attr.name, attr) -del attr - -sys.modules[__name__ + ".moves.urllib_request"] = Module_six_moves_urllib_request(__name__ + ".moves.urllib_request") -sys.modules[__name__ + ".moves.urllib.request"] = Module_six_moves_urllib_request(__name__ + ".moves.urllib.request") - - -class Module_six_moves_urllib_response(types.ModuleType): - """Lazy loading of moved objects in six.moves.urllib_response""" - - -_urllib_response_moved_attributes = [ - MovedAttribute("addbase", "urllib", "urllib.response"), - MovedAttribute("addclosehook", "urllib", "urllib.response"), - MovedAttribute("addinfo", "urllib", "urllib.response"), - MovedAttribute("addinfourl", "urllib", "urllib.response"), -] -for attr in _urllib_response_moved_attributes: - setattr(Module_six_moves_urllib_response, attr.name, attr) -del attr - -sys.modules[__name__ + ".moves.urllib_response"] = Module_six_moves_urllib_response(__name__ + ".moves.urllib_response") -sys.modules[__name__ + ".moves.urllib.response"] = Module_six_moves_urllib_response(__name__ + ".moves.urllib.response") - - -class Module_six_moves_urllib_robotparser(types.ModuleType): - """Lazy loading of moved objects in six.moves.urllib_robotparser""" - - -_urllib_robotparser_moved_attributes = [ - MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"), -] -for attr in _urllib_robotparser_moved_attributes: - setattr(Module_six_moves_urllib_robotparser, attr.name, attr) -del attr - -sys.modules[__name__ + ".moves.urllib_robotparser"] = Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib_robotparser") -sys.modules[__name__ + ".moves.urllib.robotparser"] = Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser") - - -class Module_six_moves_urllib(types.ModuleType): - """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" - parse = sys.modules[__name__ + ".moves.urllib_parse"] - error = sys.modules[__name__ + ".moves.urllib_error"] - request = sys.modules[__name__ + ".moves.urllib_request"] - response = sys.modules[__name__ + ".moves.urllib_response"] - robotparser = sys.modules[__name__ + ".moves.urllib_robotparser"] - - -sys.modules[__name__ + ".moves.urllib"] = Module_six_moves_urllib(__name__ + ".moves.urllib") - - -def add_move(move): - """Add an item to six.moves.""" - setattr(_MovedItems, move.name, move) - - -def remove_move(name): - """Remove item from six.moves.""" - try: - delattr(_MovedItems, name) - except AttributeError: - try: - del moves.__dict__[name] - except KeyError: - raise AttributeError("no such move, %r" % (name,)) - - -if PY3: - _meth_func = "__func__" - _meth_self = "__self__" - - _func_closure = "__closure__" - _func_code = "__code__" - _func_defaults = "__defaults__" - _func_globals = "__globals__" - - _iterkeys = "keys" - _itervalues = "values" - _iteritems = "items" - _iterlists = "lists" -else: - _meth_func = "im_func" - _meth_self = "im_self" - - _func_closure = "func_closure" - _func_code = "func_code" - _func_defaults = "func_defaults" - _func_globals = "func_globals" - - _iterkeys = "iterkeys" - _itervalues = "itervalues" - _iteritems = "iteritems" - _iterlists = "iterlists" - - -try: - advance_iterator = next -except NameError: - def advance_iterator(it): - return it.next() -next = advance_iterator - - -try: - callable = callable -except NameError: - def callable(obj): - return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) - - -if PY3: - def get_unbound_function(unbound): - return unbound - - create_bound_method = types.MethodType - - Iterator = object -else: - def get_unbound_function(unbound): - return unbound.im_func - - def create_bound_method(func, obj): - return types.MethodType(func, obj, obj.__class__) - - class Iterator(object): - - def next(self): - return type(self).__next__(self) - - callable = callable -_add_doc(get_unbound_function, - """Get the function out of a possibly unbound function""") - - -get_method_function = operator.attrgetter(_meth_func) -get_method_self = operator.attrgetter(_meth_self) -get_function_closure = operator.attrgetter(_func_closure) -get_function_code = operator.attrgetter(_func_code) -get_function_defaults = operator.attrgetter(_func_defaults) -get_function_globals = operator.attrgetter(_func_globals) - - -def iterkeys(d, **kw): - """Return an iterator over the keys of a dictionary.""" - return iter(getattr(d, _iterkeys)(**kw)) - -def itervalues(d, **kw): - """Return an iterator over the values of a dictionary.""" - return iter(getattr(d, _itervalues)(**kw)) - -def iteritems(d, **kw): - """Return an iterator over the (key, value) pairs of a dictionary.""" - return iter(getattr(d, _iteritems)(**kw)) - -def iterlists(d, **kw): - """Return an iterator over the (key, [values]) pairs of a dictionary.""" - return iter(getattr(d, _iterlists)(**kw)) - - -if PY3: - def b(s): - return s.encode("latin-1") - def u(s): - return s - unichr = chr - if sys.version_info[1] <= 1: - def int2byte(i): - return bytes((i,)) - else: - # This is about 2x faster than the implementation above on 3.2+ - int2byte = operator.methodcaller("to_bytes", 1, "big") - byte2int = operator.itemgetter(0) - indexbytes = operator.getitem - iterbytes = iter - import io - StringIO = io.StringIO - BytesIO = io.BytesIO -else: - def b(s): - return s - def u(s): - return unicode(s, "unicode_escape") - unichr = unichr - int2byte = chr - def byte2int(bs): - return ord(bs[0]) - def indexbytes(buf, i): - return ord(buf[i]) - def iterbytes(buf): - return (ord(byte) for byte in buf) - import StringIO - StringIO = BytesIO = StringIO.StringIO -_add_doc(b, """Byte literal""") -_add_doc(u, """Text literal""") - - -if PY3: - import builtins - exec_ = getattr(builtins, "exec") - - - def reraise(tp, value, tb=None): - if value.__traceback__ is not tb: - raise value.with_traceback(tb) - raise value - - - print_ = getattr(builtins, "print") - del builtins - -else: - def exec_(_code_, _globs_=None, _locs_=None): - """Execute code in a namespace.""" - if _globs_ is None: - frame = sys._getframe(1) - _globs_ = frame.f_globals - if _locs_ is None: - _locs_ = frame.f_locals - del frame - elif _locs_ is None: - _locs_ = _globs_ - exec("""exec _code_ in _globs_, _locs_""") - - - exec_("""def reraise(tp, value, tb=None): - raise tp, value, tb -""") - - - def print_(*args, **kwargs): - """The new-style print function.""" - fp = kwargs.pop("file", sys.stdout) - if fp is None: - return - def write(data): - if not isinstance(data, basestring): - data = str(data) - fp.write(data) - want_unicode = False - sep = kwargs.pop("sep", None) - if sep is not None: - if isinstance(sep, unicode): - want_unicode = True - elif not isinstance(sep, str): - raise TypeError("sep must be None or a string") - end = kwargs.pop("end", None) - if end is not None: - if isinstance(end, unicode): - want_unicode = True - elif not isinstance(end, str): - raise TypeError("end must be None or a string") - if kwargs: - raise TypeError("invalid keyword arguments to print()") - if not want_unicode: - for arg in args: - if isinstance(arg, unicode): - want_unicode = True - break - if want_unicode: - newline = unicode("\n") - space = unicode(" ") - else: - newline = "\n" - space = " " - if sep is None: - sep = space - if end is None: - end = newline - for i, arg in enumerate(args): - if i: - write(sep) - write(arg) - write(end) - -_add_doc(reraise, """Reraise an exception.""") - - -def with_metaclass(meta, *bases): - """Create a base class with a metaclass.""" - return meta("NewBase", bases, {}) - -def add_metaclass(metaclass): - """Class decorator for creating a class with a metaclass.""" - def wrapper(cls): - orig_vars = cls.__dict__.copy() - orig_vars.pop('__dict__', None) - orig_vars.pop('__weakref__', None) - for slots_var in orig_vars.get('__slots__', ()): - orig_vars.pop(slots_var) - return metaclass(cls.__name__, cls.__bases__, orig_vars) - return wrapper From 53f9eccacfd7575ca5915011ada0a2079aa7a393 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 15:05:48 -0500 Subject: [PATCH 09/23] nmf default value of init param --- sklearn/decomposition/_nmf.py | 14 ++------------ sklearn/decomposition/tests/test_nmf.py | 4 ---- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 9d335eb775d8b..71cab4da9a725 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -842,7 +842,7 @@ def _fit_multiplicative_update(X, W, H, beta_loss='frobenius', def non_negative_factorization(X, W=None, H=None, n_components=None, - init='warn', update_H=True, solver='cd', + init=None, update_H=True, solver='cd', beta_loss='frobenius', tol=1e-4, max_iter=200, alpha=0., l1_ratio=0., regularization=None, random_state=None, @@ -891,10 +891,7 @@ def non_negative_factorization(X, W=None, H=None, n_components=None, init : None | 'random' | 'nndsvd' | 'nndsvda' | 'nndsvdar' | 'custom' Method used to initialize the procedure. - Default: 'random'. - - The default value will change from 'random' to None in version 0.23 - to make it consistent with decomposition.NMF. + Default: None. Valid options: @@ -1028,13 +1025,6 @@ def non_negative_factorization(X, W=None, H=None, n_components=None, raise ValueError("Tolerance for stopping criteria must be " "positive; got (tol=%r)" % tol) - if init == "warn": - if n_components < n_features: - warnings.warn("The default value of init will change from " - "random to None in 0.23 to make it consistent " - "with decomposition.NMF.", FutureWarning) - init = "random" - # check W and H, or initialize them if init == 'custom' and update_H: _check_init(H, (n_components, n_features), "NMF (input H)") diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py index d98ad551513e7..4fd21ffbf5b1d 100644 --- a/sklearn/decomposition/tests/test_nmf.py +++ b/sklearn/decomposition/tests/test_nmf.py @@ -224,10 +224,6 @@ def test_non_negative_factorization_checking(): A = np.ones((2, 2)) # Test parameters checking is public function nnmf = non_negative_factorization - msg = ("The default value of init will change from " - "random to None in 0.23 to make it consistent " - "with decomposition.NMF.") - assert_warns_message(FutureWarning, msg, nnmf, A, A, A, np.int64(1)) msg = ("Number of components must be a positive integer; " "got (n_components=1.5)") assert_raise_message(ValueError, msg, nnmf, A, A, A, 1.5, 'random') From d80940ae9e7e14e7257b14c08b8397de8294e304 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 15:16:14 -0500 Subject: [PATCH 10/23] raise error instead of warning in LinearDiscriminantAnalysis --- sklearn/discriminant_analysis.py | 19 ++++--------------- sklearn/tests/test_discriminant_analysis.py | 18 +++++------------- 2 files changed, 9 insertions(+), 28 deletions(-) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 4492d0868994d..1495d00620911 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -423,7 +423,6 @@ def fit(self, X, y): y : array, shape (n_samples,) Target values. """ - # FIXME: Future warning to be removed in 0.23 X, y = check_X_y(X, y, ensure_min_samples=2, estimator=self, dtype=[np.float64, np.float32]) self.classes_ = unique_labels(y) @@ -455,21 +454,11 @@ def fit(self, X, y): self._max_components = max_components else: if self.n_components > max_components: - warnings.warn( + raise ValueError( "n_components cannot be larger than min(n_features, " - "n_classes - 1). Using min(n_features, " - "n_classes - 1) = min(%d, %d - 1) = %d components." - % (X.shape[1], len(self.classes_), max_components), - ChangedBehaviorWarning) - future_msg = ("In version 0.23, setting n_components > min(" - "n_features, n_classes - 1) will raise a " - "ValueError. You should set n_components to None" - " (default), or a value smaller or equal to " - "min(n_features, n_classes - 1).") - warnings.warn(future_msg, FutureWarning) - self._max_components = max_components - else: - self._max_components = self.n_components + "n_classes - 1)." + ) + self._max_components = self.n_components if self.solver == 'svd': if self.shrinkage is not None: diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 7b3e94bea793c..24c207703040b 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -332,7 +332,6 @@ def test_lda_store_covariance(): @pytest.mark.parametrize('n_features', [3, 5]) @pytest.mark.parametrize('n_classes', [5, 3]) def test_lda_dimension_warning(n_classes, n_features): - # FIXME: Future warning to be removed in 0.23 rng = check_random_state(0) n_samples = 10 X = rng.randn(n_samples, n_features) @@ -348,22 +347,15 @@ def test_lda_dimension_warning(n_classes, n_features): for n_components in [max_components + 1, max(n_features, n_classes - 1) + 1]: - # if n_components > min(n_classes - 1, n_features), raise warning + # if n_components > min(n_classes - 1, n_features), raise error. # We test one unit higher than max_components, and then something # larger than both n_features and n_classes - 1 to ensure the test # works for any value of n_component lda = LinearDiscriminantAnalysis(n_components=n_components) - msg = ("n_components cannot be larger than min(n_features, " - "n_classes - 1). Using min(n_features, " - "n_classes - 1) = min(%d, %d - 1) = %d components." % - (n_features, n_classes, max_components)) - assert_warns_message(ChangedBehaviorWarning, msg, lda.fit, X, y) - future_msg = ("In version 0.23, setting n_components > min(" - "n_features, n_classes - 1) will raise a " - "ValueError. You should set n_components to None" - " (default), or a value smaller or equal to " - "min(n_features, n_classes - 1).") - assert_warns_message(FutureWarning, future_msg, lda.fit, X, y) + msg = ("n_components cannot be larger than min\(n_features, " + "n_classes - 1\).") + with pytest.raises(ValueError, match=msg): + lda.fit(X, y) @pytest.mark.parametrize("data_type, expected_type", [ From 16b3c9c6737ec8ebd1560bbef58b3184f4aaf5ab Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 15:19:12 -0500 Subject: [PATCH 11/23] removed label param in hamming_loss --- sklearn/metrics/_classification.py | 19 +------------------ sklearn/metrics/tests/test_classification.py | 5 ----- 2 files changed, 1 insertion(+), 23 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 666f110aee6fc..0c8fca872c6cb 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1986,7 +1986,7 @@ class 2 1.00 0.67 0.80 3 return report -def hamming_loss(y_true, y_pred, labels=None, sample_weight=None): +def hamming_loss(y_true, y_pred, sample_weight=None): """Compute the average Hamming loss. The Hamming loss is the fraction of labels that are incorrectly predicted. @@ -2001,17 +2001,6 @@ def hamming_loss(y_true, y_pred, labels=None, sample_weight=None): y_pred : 1d array-like, or label indicator array / sparse matrix Predicted labels, as returned by a classifier. - labels : array, shape = [n_labels], optional (default='deprecated') - Integer array of labels. If not provided, labels will be inferred - from y_true and y_pred. - - .. versionadded:: 0.18 - .. deprecated:: 0.21 - This parameter ``labels`` is deprecated in version 0.21 and will - be removed in version 0.23. Hamming loss uses ``y_true.shape[1]`` - for the number of labels when y_true is binary label indicators, - so it is unnecessary for the user to specify. - sample_weight : array-like of shape (n_samples,), default=None Sample weights. @@ -2071,12 +2060,6 @@ def hamming_loss(y_true, y_pred, labels=None, sample_weight=None): y_type, y_true, y_pred = _check_targets(y_true, y_pred) check_consistent_length(y_true, y_pred, sample_weight) - if labels is not None: - warnings.warn("The labels parameter is unused. It was" - " deprecated in version 0.21 and" - " will be removed in version 0.23", - FutureWarning) - if sample_weight is None: weight_average = 1. else: diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 66ea486f955b7..4c1db4b55bb16 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1176,11 +1176,6 @@ def test_multilabel_hamming_loss(): assert hamming_loss(y1, np.zeros_like(y1), sample_weight=w) == 2. / 3 # sp_hamming only works with 1-D arrays assert hamming_loss(y1[0], y2[0]) == sp_hamming(y1[0], y2[0]) - assert_warns_message(FutureWarning, - "The labels parameter is unused. It was" - " deprecated in version 0.21 and" - " will be removed in version 0.23", - hamming_loss, y1, y2, labels=[0, 1]) def test_jaccard_score_validation(): From 7af6207657f47f3fc2ee339b45d187897d4ab576 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 15:21:56 -0500 Subject: [PATCH 12/23] updated method parameter of power_transform --- sklearn/preprocessing/_data.py | 15 ++------------- sklearn/preprocessing/tests/test_data.py | 18 ------------------ 2 files changed, 2 insertions(+), 31 deletions(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index a9fada3345348..2d34cf66d511a 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -3005,7 +3005,7 @@ def _more_tags(self): return {'allow_nan': True} -def power_transform(X, method='warn', standardize=True, copy=True): +def power_transform(X, method='yeo-johnson', standardize=True, copy=True): """ Power transforms are a family of parametric, monotonic transformations that are applied to make data more Gaussian-like. This is useful for @@ -3029,16 +3029,12 @@ def power_transform(X, method='warn', standardize=True, copy=True): X : array-like, shape (n_samples, n_features) The data to be transformed using a power transformation. - method : str + method : {'yeo-johnson', 'box-cox'}, default='yeo-johnson' The power transform method. Available methods are: - 'yeo-johnson' [1]_, works with positive and negative values - 'box-cox' [2]_, only works with strictly positive values - The default method will be changed from 'box-cox' to 'yeo-johnson' - in version 0.23. To suppress the FutureWarning, explicitly set the - parameter. - standardize : boolean, default=True Set to True to apply zero-mean, unit-variance normalization to the transformed output. @@ -3089,12 +3085,5 @@ def power_transform(X, method='warn', standardize=True, copy=True): .. [2] G.E.P. Box and D.R. Cox, "An Analysis of Transformations", Journal of the Royal Statistical Society B, 26, 211-252 (1964). """ - if method == 'warn': - warnings.warn("The default value of 'method' will change from " - "'box-cox' to 'yeo-johnson' in version 0.23. Set " - "the 'method' argument explicitly to silence this " - "warning in the meantime.", - FutureWarning) - method = 'box-cox' pt = PowerTransformer(method=method, standardize=standardize, copy=copy) return pt.fit_transform(X) diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index a67c101dec499..9a8e31d468f1c 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -2452,21 +2452,3 @@ def test_power_transformer_copy_False(method, standardize): X_inv_trans = pt.inverse_transform(X_trans) assert X_trans is X_inv_trans - - -def test_power_transform_default_method(): - X = np.abs(X_2d) - - future_warning_message = ( - "The default value of 'method' " - "will change from 'box-cox'" - ) - assert_warns_message(FutureWarning, future_warning_message, - power_transform, X) - - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - X_trans_default = power_transform(X) - - X_trans_boxcox = power_transform(X, method='box-cox') - assert_array_equal(X_trans_boxcox, X_trans_default) From 808ab057f1dfbbde80111ba4fdb2580e6bd04040 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 15:23:21 -0500 Subject: [PATCH 13/23] pep8 --- sklearn/tests/test_discriminant_analysis.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 24c207703040b..dcd4009a47a2d 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -352,8 +352,7 @@ def test_lda_dimension_warning(n_classes, n_features): # larger than both n_features and n_classes - 1 to ensure the test # works for any value of n_component lda = LinearDiscriminantAnalysis(n_components=n_components) - msg = ("n_components cannot be larger than min\(n_features, " - "n_classes - 1\).") + msg = "n_components cannot be larger than " with pytest.raises(ValueError, match=msg): lda.fit(X, y) From 0d574a0c0c63b722a69b531a21ba61c924a1e1dd Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 15:39:19 -0500 Subject: [PATCH 14/23] changed default value of min_impurity_split --- sklearn/tree/_classes.py | 38 ++++++++++++++++----------------- sklearn/tree/tests/test_tree.py | 4 ++-- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py index ea43716e20ae6..aa93876ff32dc 100644 --- a/sklearn/tree/_classes.py +++ b/sklearn/tree/_classes.py @@ -293,19 +293,17 @@ def fit(self, X, y, sample_weight=None, check_input=True, min_weight_leaf = (self.min_weight_fraction_leaf * np.sum(sample_weight)) - if self.min_impurity_split is not None: + min_impurity_split = self.min_impurity_split + if min_impurity_split != 0: warnings.warn("The min_impurity_split parameter is deprecated. " - "Its default value will change from 1e-7 to 0 in " + "Its default value has changed from 1e-7 to 0 in " "version 0.23, and it will be removed in 0.25. " "Use the min_impurity_decrease parameter instead.", FutureWarning) - min_impurity_split = self.min_impurity_split - else: - min_impurity_split = 1e-7 - if min_impurity_split < 0.: - raise ValueError("min_impurity_split must be greater than " - "or equal to 0") + if min_impurity_split < 0.: + raise ValueError("min_impurity_split must be greater than " + "or equal to 0") if self.min_impurity_decrease < 0.: raise ValueError("min_impurity_decrease must be greater than " @@ -679,14 +677,14 @@ class DecisionTreeClassifier(ClassifierMixin, BaseDecisionTree): .. versionadded:: 0.19 - min_impurity_split : float, default=1e-7 + min_impurity_split : float, default=0 Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of - ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 0.25. Use ``min_impurity_decrease`` instead. class_weight : dict, list of dicts, "balanced" or None, default=None @@ -812,7 +810,7 @@ def __init__(self, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0., - min_impurity_split=None, + min_impurity_split=0, class_weight=None, presort='deprecated', ccp_alpha=0.0): @@ -1061,14 +1059,14 @@ class DecisionTreeRegressor(RegressorMixin, BaseDecisionTree): .. versionadded:: 0.19 - min_impurity_split : float, (default=1e-7) + min_impurity_split : float, (default=0) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of - ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 0.25. Use ``min_impurity_decrease`` instead. presort : deprecated, default='deprecated' @@ -1165,7 +1163,7 @@ def __init__(self, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0., - min_impurity_split=None, + min_impurity_split=0, presort='deprecated', ccp_alpha=0.0): super().__init__( @@ -1349,14 +1347,14 @@ class ExtraTreeClassifier(DecisionTreeClassifier): .. versionadded:: 0.19 - min_impurity_split : float, (default=1e-7) + min_impurity_split : float, (default=0) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of - ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 0.25. Use ``min_impurity_decrease`` instead. class_weight : dict, list of dicts, "balanced" or None, default=None @@ -1448,7 +1446,7 @@ def __init__(self, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0., - min_impurity_split=None, + min_impurity_split=0, class_weight=None, ccp_alpha=0.0): super().__init__( @@ -1573,14 +1571,14 @@ class ExtraTreeRegressor(DecisionTreeRegressor): .. versionadded:: 0.19 - min_impurity_split : float, (default=1e-7) + min_impurity_split : float, (default=0) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of - ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 0.25. Use ``min_impurity_decrease`` instead. max_leaf_nodes : int or None, optional (default=None) @@ -1657,7 +1655,7 @@ def __init__(self, max_features="auto", random_state=None, min_impurity_decrease=0., - min_impurity_split=None, + min_impurity_split=0, max_leaf_nodes=None, ccp_alpha=0.0): super().__init__( diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py index dcd9d4c01a8ec..0488a69efc37b 100644 --- a/sklearn/tree/tests/test_tree.py +++ b/sklearn/tree/tests/test_tree.py @@ -802,8 +802,8 @@ def test_min_impurity_split(): # impurity 1e-7 est = TreeEstimator(max_leaf_nodes=max_leaf_nodes, random_state=0) - assert est.min_impurity_split is None, ( - "Failed, min_impurity_split = {0} > 1e-7".format( + assert est.min_impurity_split == 0, ( + "Failed, min_impurity_split = {0} != 0".format( est.min_impurity_split)) try: assert_warns(FutureWarning, est.fit, X, y) From 5a4c2d50ed383f826f4705c6e264bc044dc33ef4 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 5 Dec 2019 15:41:31 -0500 Subject: [PATCH 15/23] removed assert_false and assert_true --- sklearn/utils/_testing.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py index 4e4e6043eae3d..c40e2bc84d8f9 100644 --- a/sklearn/utils/_testing.py +++ b/sklearn/utils/_testing.py @@ -53,8 +53,8 @@ __all__ = ["assert_equal", "assert_not_equal", "assert_raises", - "assert_raises_regexp", "assert_true", - "assert_false", "assert_almost_equal", "assert_array_equal", + "assert_raises_regexp", + "assert_almost_equal", "assert_array_equal", "assert_array_almost_equal", "assert_array_less", "assert_less", "assert_less_equal", "assert_greater", "assert_greater_equal", @@ -85,16 +85,6 @@ # the old name for now assert_raises_regexp = assert_raises_regex -deprecation_message = "'assert_true' is deprecated in version 0.21 " \ - "and will be removed in version 0.23. " \ - "Please use 'assert' instead." -assert_true = deprecated(deprecation_message)(_dummy.assertTrue) - -deprecation_message = "'assert_false' is deprecated in version 0.21 " \ - "and will be removed in version 0.23. " \ - "Please use 'assert' instead." -assert_false = deprecated(deprecation_message)(_dummy.assertFalse) - def assert_warns(warning_class, func, *args, **kw): """Test that a certain warning occurs. From 04ec379cbdd20e095c66d7315ff92ca0b2c21429 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 9 Dec 2019 09:20:34 -0500 Subject: [PATCH 16/23] added and fixed versionchanged directives --- sklearn/decomposition/_nmf.py | 3 +++ sklearn/preprocessing/_data.py | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 71cab4da9a725..6d5509611cefd 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -912,6 +912,9 @@ def non_negative_factorization(X, W=None, H=None, n_components=None, - 'custom': use custom matrices W and H + .. versionchanged:: 0.23 + The default value of `init` changed from 'random' to None in 0.23. + update_H : boolean, default: True Set to True, both W and H will be estimated from initial guesses. Set to False, only W will be estimated. diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index 3486cc3daa075..cae75be2e591c 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -2606,8 +2606,8 @@ def quantile_transform(X, axis=0, n_quantiles=1000, input is already a numpy array). If True, a copy of `X` is transformed, leaving the original `X` unchanged - ..versionchnanged:: 0.22 - The default value of `copy` changed from False to True in 0.22. + ..versionchnanged:: 0.23 + The default value of `copy` changed from False to True in 0.23. Returns ------- @@ -3038,6 +3038,10 @@ def power_transform(X, method='yeo-johnson', standardize=True, copy=True): - 'yeo-johnson' [1]_, works with positive and negative values - 'box-cox' [2]_, only works with strictly positive values + .. versionchanged:: 0.23 + The default value of the `method` parameter changed from + 'box-cox' to 'yeo-johnson' in 0.23. + standardize : boolean, default=True Set to True to apply zero-mean, unit-variance normalization to the transformed output. From 015ad40739ad9d2067c767e2921dfa926876ba88 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 9 Dec 2019 09:27:51 -0500 Subject: [PATCH 17/23] reset min_impurity_split default to None --- sklearn/tree/_classes.py | 12 +++++++----- sklearn/tree/tests/test_tree.py | 4 ++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py index aa93876ff32dc..b2e2eae4a9f52 100644 --- a/sklearn/tree/_classes.py +++ b/sklearn/tree/_classes.py @@ -294,7 +294,7 @@ def fit(self, X, y, sample_weight=None, check_input=True, np.sum(sample_weight)) min_impurity_split = self.min_impurity_split - if min_impurity_split != 0: + if min_impurity_split is not None: warnings.warn("The min_impurity_split parameter is deprecated. " "Its default value has changed from 1e-7 to 0 in " "version 0.23, and it will be removed in 0.25. " @@ -304,6 +304,8 @@ def fit(self, X, y, sample_weight=None, check_input=True, if min_impurity_split < 0.: raise ValueError("min_impurity_split must be greater than " "or equal to 0") + else: + min_impurity_split = 0 if self.min_impurity_decrease < 0.: raise ValueError("min_impurity_decrease must be greater than " @@ -810,7 +812,7 @@ def __init__(self, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0., - min_impurity_split=0, + min_impurity_split=None, class_weight=None, presort='deprecated', ccp_alpha=0.0): @@ -1163,7 +1165,7 @@ def __init__(self, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0., - min_impurity_split=0, + min_impurity_split=None, presort='deprecated', ccp_alpha=0.0): super().__init__( @@ -1446,7 +1448,7 @@ def __init__(self, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0., - min_impurity_split=0, + min_impurity_split=None, class_weight=None, ccp_alpha=0.0): super().__init__( @@ -1655,7 +1657,7 @@ def __init__(self, max_features="auto", random_state=None, min_impurity_decrease=0., - min_impurity_split=0, + min_impurity_split=None, max_leaf_nodes=None, ccp_alpha=0.0): super().__init__( diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py index 0488a69efc37b..9f65ad7f68e83 100644 --- a/sklearn/tree/tests/test_tree.py +++ b/sklearn/tree/tests/test_tree.py @@ -802,8 +802,8 @@ def test_min_impurity_split(): # impurity 1e-7 est = TreeEstimator(max_leaf_nodes=max_leaf_nodes, random_state=0) - assert est.min_impurity_split == 0, ( - "Failed, min_impurity_split = {0} != 0".format( + assert est.min_impurity_split is None, ( + "Failed, min_impurity_split = {0} != None".format( est.min_impurity_split)) try: assert_warns(FutureWarning, est.fit, X, y) From e6443a50cfee1e23ac8069b70c63b23f76decac5 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 9 Dec 2019 12:17:20 -0500 Subject: [PATCH 18/23] fixed LDA issue --- sklearn/utils/estimator_checks.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 30c668237b371..b8471daf5deab 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -479,9 +479,6 @@ def _set_checking_parameters(estimator): # K-Means estimator.set_params(n_init=2) - if hasattr(estimator, "n_components"): - estimator.n_components = 2 - if name == 'TruncatedSVD': # TruncatedSVD doesn't run with n_components = n_features # This is ugly :-/ From 09bf4e55a4bfbda707fb76c943cc598be851a500 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 9 Dec 2019 12:24:14 -0500 Subject: [PATCH 19/23] fixed some test --- sklearn/metrics/tests/test_common.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 991af61537012..331bcf197dccb 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -351,8 +351,6 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "roc_curve", "precision_recall_curve", - "hamming_loss", - "precision_score", "recall_score", "f1_score", "f2_score", "f0.5_score", "jaccard_score", From 1fae94f0faa96b5fa920d4640f0448a9892f2e8e Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 9 Dec 2019 12:50:53 -0500 Subject: [PATCH 20/23] more docstrings updates --- sklearn/ensemble/_forest.py | 20 ++++++++++---------- sklearn/ensemble/_gb.py | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index eba59c232531b..7e88f0c2f189a 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -935,14 +935,14 @@ class RandomForestClassifier(ForestClassifier): .. versionadded:: 0.19 - min_impurity_split : float, (default=1e-7) + min_impurity_split : float, (default=0) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of - ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 0.25. Use ``min_impurity_decrease`` instead. @@ -1253,14 +1253,14 @@ class RandomForestRegressor(ForestRegressor): .. versionadded:: 0.19 - min_impurity_split : float, (default=1e-7) + min_impurity_split : float, (default=0) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of - ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 0.25. Use ``min_impurity_decrease`` instead. bootstrap : boolean, optional (default=True) @@ -1530,14 +1530,14 @@ class ExtraTreesClassifier(ForestClassifier): .. versionadded:: 0.19 - min_impurity_split : float, (default=1e-7) + min_impurity_split : float, (default=0) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of - ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 0.25. Use ``min_impurity_decrease`` instead. bootstrap : boolean, optional (default=False) @@ -1840,14 +1840,14 @@ class ExtraTreesRegressor(ForestRegressor): .. versionadded:: 0.19 - min_impurity_split : float, (default=1e-7) + min_impurity_split : float, (default=0) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of - ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 0.25. Use ``min_impurity_decrease`` instead. bootstrap : boolean, optional (default=False) @@ -2078,14 +2078,14 @@ class RandomTreesEmbedding(BaseForest): .. versionadded:: 0.19 - min_impurity_split : float, (default=1e-7) + min_impurity_split : float, (default=0) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of - ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 0.25. Use ``min_impurity_decrease`` instead. sparse_output : bool, optional (default=True) diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index c3971e019a088..667a526e486a9 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -868,14 +868,14 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): .. versionadded:: 0.19 - min_impurity_split : float, (default=1e-7) + min_impurity_split : float, (default=0) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of - ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 0.25. Use ``min_impurity_decrease`` instead. init : estimator or 'zero', optional (default=None) @@ -1340,14 +1340,14 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): .. versionadded:: 0.19 - min_impurity_split : float, (default=1e-7) + min_impurity_split : float, (default=0) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of - ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 0.25. Use ``min_impurity_decrease`` instead. init : estimator or 'zero', optional (default=None) From 43fea84562675b541c682fe50a5732b49962643c Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 9 Dec 2019 12:54:28 -0500 Subject: [PATCH 21/23] set min_impurity_decrease for test to pass --- sklearn/ensemble/tests/test_gradient_boosting.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index 5fe9dee573d1d..a28c69d0f7cc5 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -1170,9 +1170,10 @@ def test_non_uniform_weights_toy_edge_case_clf(): def check_sparse_input(EstimatorClass, X, X_sparse, y): dense = EstimatorClass(n_estimators=10, random_state=0, - max_depth=2).fit(X, y) + max_depth=2, min_impurity_decrease=1e-7).fit(X, y) sparse = EstimatorClass(n_estimators=10, random_state=0, - max_depth=2).fit(X_sparse, y) + max_depth=2, + min_impurity_decrease=1e-7).fit(X_sparse, y) assert_array_almost_equal(sparse.apply(X), dense.apply(X)) assert_array_almost_equal(sparse.predict(X), dense.predict(X)) From 7cd20a07db5b26492345f3abc2eaab8e16f29821 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 9 Dec 2019 14:30:30 -0500 Subject: [PATCH 22/23] upate docstring example --- sklearn/tree/_classes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py index b2e2eae4a9f52..4eb02464e786f 100644 --- a/sklearn/tree/_classes.py +++ b/sklearn/tree/_classes.py @@ -1645,7 +1645,7 @@ class ExtraTreeRegressor(DecisionTreeRegressor): >>> reg = BaggingRegressor(extra_tree, random_state=0).fit( ... X_train, y_train) >>> reg.score(X_test, y_test) - 0.7823... + 0.7788... """ def __init__(self, criterion="mse", From 7fb08724e7aa53342ac16d0c0a19e77ba58f938d Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 9 Dec 2019 14:50:10 -0500 Subject: [PATCH 23/23] fixed doctest --- doc/modules/ensemble.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index c2897ed518509..8a414e5371511 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -1323,7 +1323,7 @@ computationally expensive. StackingRegressor(...) >>> print('R2 score: {:.2f}' ... .format(multi_layer_regressor.score(X_test, y_test))) - R2 score: 0.82 + R2 score: 0.83 .. topic:: References