From 4a61a3965e69ca2b30192192d5b39fa35fa31a5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 9 Aug 2023 15:35:01 +0200 Subject: [PATCH 1/6] MNT Adjust code after NEP 51 numpy scalar formatting changes --- sklearn/dummy.py | 2 +- sklearn/impute/tests/test_impute.py | 4 ++-- sklearn/preprocessing/_encoders.py | 2 +- sklearn/utils/class_weight.py | 3 ++- sklearn/utils/validation.py | 4 ++-- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sklearn/dummy.py b/sklearn/dummy.py index 1db664826f5c9..63318b07ce580 100644 --- a/sklearn/dummy.py +++ b/sklearn/dummy.py @@ -227,7 +227,7 @@ def fit(self, X, y, sample_weight=None): "The constant target value must be present in " "the training data. You provided constant={}. " "Possible values are: {}.".format( - self.constant, list(self.classes_[k]) + self.constant, self.classes_[k].tolist() ) ) raise ValueError(err_msg) diff --git a/sklearn/impute/tests/test_impute.py b/sklearn/impute/tests/test_impute.py index 936847e55e324..9d02eb7722cfc 100644 --- a/sklearn/impute/tests/test_impute.py +++ b/sklearn/impute/tests/test_impute.py @@ -259,7 +259,7 @@ def test_imputation_median_special_cases(): @pytest.mark.parametrize("dtype", [None, object, str]) def test_imputation_mean_median_error_invalid_type(strategy, dtype): X = np.array([["a", "b", 3], [4, "e", 6], ["g", "h", 9]], dtype=dtype) - msg = "non-numeric data:\ncould not convert string to float: '" + msg = "non-numeric data:\ncould not convert string to float:" with pytest.raises(ValueError, match=msg): imputer = SimpleImputer(strategy=strategy) imputer.fit_transform(X) @@ -272,7 +272,7 @@ def test_imputation_mean_median_error_invalid_type_list_pandas(strategy, type): if type == "dataframe": pd = pytest.importorskip("pandas") X = pd.DataFrame(X) - msg = "non-numeric data:\ncould not convert string to float: '" + msg = "non-numeric data:\ncould not convert string to float:" with pytest.raises(ValueError, match=msg): imputer = SimpleImputer(strategy=strategy) imputer.fit_transform(X) diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py index 2c4ea4af450f2..cd89cc6cb27b1 100644 --- a/sklearn/preprocessing/_encoders.py +++ b/sklearn/preprocessing/_encoders.py @@ -774,7 +774,7 @@ def _map_drop_idx_to_infrequent(self, feature_idx, drop_idx): if infrequent_indices is not None and drop_idx in infrequent_indices: categories = self.categories_[feature_idx] raise ValueError( - f"Unable to drop category {categories[drop_idx]!r} from feature" + f"Unable to drop category {categories[drop_idx].tolist()!r} from feature" f" {feature_idx} because it is infrequent" ) return default_to_infrequent[drop_idx] diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py index 0b59f63190c3b..9e170debd4556 100644 --- a/sklearn/utils/class_weight.py +++ b/sklearn/utils/class_weight.py @@ -74,8 +74,9 @@ def compute_class_weight(class_weight, *, classes, y): n_weighted_classes = len(classes) - len(unweighted_classes) if unweighted_classes and n_weighted_classes != len(class_weight): + unweighted_classes_user_friendly_str = np.array(unweighted_classes).tolist() raise ValueError( - f"The classes, {unweighted_classes}, are not in class_weight" + f"The classes, {unweighted_classes_user_friendly_str}, are not in class_weight" ) return weight diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 641459fd75f92..50a661714eb30 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -2270,9 +2270,9 @@ def _check_pos_label_consistency(pos_label, y_true): or np.array_equal(classes, [1]) ) ): - classes_repr = ", ".join(repr(c) for c in classes) + classes_repr = str(classes.tolist()).replace('[', '{').replace(']', '}') raise ValueError( - f"y_true takes value in {{{classes_repr}}} and pos_label is not " + f"y_true takes value in {classes_repr} and pos_label is not " "specified: either make y_true take value in {0, 1} or " "{-1, 1} or pass pos_label explicitly." ) From 50c8879e9c494267d6b6f370d0ff3a2d0d4d6932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 9 Aug 2023 15:42:33 +0200 Subject: [PATCH 2/6] [scipy-dev] From e439f6d4a481e5fe9481c3fc4a9152185f961b70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 9 Aug 2023 15:56:43 +0200 Subject: [PATCH 3/6] [scipy-dev] --- sklearn/preprocessing/_encoders.py | 4 ++-- sklearn/utils/class_weight.py | 3 ++- sklearn/utils/validation.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py index cd89cc6cb27b1..348f1c4ae5b3d 100644 --- a/sklearn/preprocessing/_encoders.py +++ b/sklearn/preprocessing/_encoders.py @@ -774,8 +774,8 @@ def _map_drop_idx_to_infrequent(self, feature_idx, drop_idx): if infrequent_indices is not None and drop_idx in infrequent_indices: categories = self.categories_[feature_idx] raise ValueError( - f"Unable to drop category {categories[drop_idx].tolist()!r} from feature" - f" {feature_idx} because it is infrequent" + f"Unable to drop category {categories[drop_idx].tolist()!r} from" + f" feature {feature_idx} because it is infrequent" ) return default_to_infrequent[drop_idx] diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py index 9e170debd4556..19e7bcb7ba17a 100644 --- a/sklearn/utils/class_weight.py +++ b/sklearn/utils/class_weight.py @@ -76,7 +76,8 @@ def compute_class_weight(class_weight, *, classes, y): if unweighted_classes and n_weighted_classes != len(class_weight): unweighted_classes_user_friendly_str = np.array(unweighted_classes).tolist() raise ValueError( - f"The classes, {unweighted_classes_user_friendly_str}, are not in class_weight" + f"The classes, {unweighted_classes_user_friendly_str}, are not in" + " class_weight" ) return weight diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 50a661714eb30..f1cedde640356 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -2270,7 +2270,7 @@ def _check_pos_label_consistency(pos_label, y_true): or np.array_equal(classes, [1]) ) ): - classes_repr = str(classes.tolist()).replace('[', '{').replace(']', '}') + classes_repr = str(classes.tolist()).replace("[", "{").replace("]", "}") raise ValueError( f"y_true takes value in {classes_repr} and pos_label is not " "specified: either make y_true take value in {0, 1} or " From 4d17d1f579145990e5251cb8d960ee75fd05cc22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 18 Aug 2023 13:54:39 +0200 Subject: [PATCH 4/6] [scipy-dev] [azure parallel] From cba58a4fbe84e41d4f114621b6feb1fd9cf40ffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 18 Aug 2023 14:37:08 +0200 Subject: [PATCH 5/6] [scipy-dev] Use .item instead of .tolist --- sklearn/preprocessing/_encoders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py index 823eb57ed2d2d..8f441907d5471 100644 --- a/sklearn/preprocessing/_encoders.py +++ b/sklearn/preprocessing/_encoders.py @@ -774,7 +774,7 @@ def _map_drop_idx_to_infrequent(self, feature_idx, drop_idx): if infrequent_indices is not None and drop_idx in infrequent_indices: categories = self.categories_[feature_idx] raise ValueError( - f"Unable to drop category {categories[drop_idx].tolist()!r} from" + f"Unable to drop category {categories[drop_idx].item()!r} from" f" feature {feature_idx} because it is infrequent" ) return default_to_infrequent[drop_idx] From c9472a7c861ff0a7c4a7ca59dfdcbd18e8bc0543 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 18 Aug 2023 15:11:25 +0200 Subject: [PATCH 6/6] [scipy-dev] clean-up --- sklearn/utils/validation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 6e1cf7ca5102c..20e878256803b 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -2271,9 +2271,9 @@ def _check_pos_label_consistency(pos_label, y_true): or np.array_equal(classes, [1]) ) ): - classes_repr = str(classes.tolist()).replace("[", "{").replace("]", "}") + classes_repr = ", ".join([repr(c) for c in classes.tolist()]) raise ValueError( - f"y_true takes value in {classes_repr} and pos_label is not " + f"y_true takes value in {{{classes_repr}}} and pos_label is not " "specified: either make y_true take value in {0, 1} or " "{-1, 1} or pass pos_label explicitly." )