diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index af9225933100c..38f021ec5f82d 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -270,18 +270,21 @@ def _check_categories(self, X): if missing.any(): categories = categories[~missing] + if hasattr(self, "feature_names_in_"): + feature_name = f"'{self.feature_names_in_[f_idx]}'" + else: + feature_name = f"at index {f_idx}" + if categories.size > self.max_bins: raise ValueError( - f"Categorical feature at index {f_idx} is " - "expected to have a " - f"cardinality <= {self.max_bins}" + f"Categorical feature {feature_name} is expected to " + f"have a cardinality <= {self.max_bins}" ) if (categories >= self.max_bins).any(): raise ValueError( - f"Categorical feature at index {f_idx} is " - "expected to be encoded with " - f"values < {self.max_bins}" + f"Categorical feature {feature_name} is expected to " + f"be encoded with values < {self.max_bins}" ) else: categories = None diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index d1a8f56bbd479..8756d27869e36 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -1141,20 +1141,32 @@ def test_categorical_spec_no_categories(Est, categorical_features, as_array): @pytest.mark.parametrize( "Est", (HistGradientBoostingClassifier, HistGradientBoostingRegressor) ) -def test_categorical_bad_encoding_errors(Est): +@pytest.mark.parametrize( + "use_pandas, feature_name", [(False, "at index 0"), (True, "'f0'")] +) +def test_categorical_bad_encoding_errors(Est, use_pandas, feature_name): # Test errors when categories are encoded incorrectly gb = Est(categorical_features=[True], max_bins=2) - X = np.array([[0, 1, 2]]).T + if use_pandas: + pd = pytest.importorskip("pandas") + X = pd.DataFrame({"f0": [0, 1, 2]}) + else: + X = np.array([[0, 1, 2]]).T y = np.arange(3) - msg = "Categorical feature at index 0 is expected to have a cardinality <= 2" + msg = f"Categorical feature {feature_name} is expected to have a cardinality <= 2" with pytest.raises(ValueError, match=msg): gb.fit(X, y) - X = np.array([[0, 2]]).T + if use_pandas: + X = pd.DataFrame({"f0": [0, 2]}) + else: + X = np.array([[0, 2]]).T y = np.arange(2) - msg = "Categorical feature at index 0 is expected to be encoded with values < 2" + msg = ( + f"Categorical feature {feature_name} is expected to be encoded with values < 2" + ) with pytest.raises(ValueError, match=msg): gb.fit(X, y)