scikit-learn
diff --git a/‎doc/whats_new/v1.1.rst‎
Lines changed: 5 additions & 0 deletions b/‎doc/whats_new/v1.1.rst‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎sklearn/ensemble/_gb.py‎
Lines changed: 90 additions & 28 deletions b/‎sklearn/ensemble/_gb.py‎
Lines changed: 90 additions & 28 deletions
diff --git a/‎sklearn/ensemble/tests/test_gradient_boosting.py‎
Lines changed: 107 additions & 34 deletions b/‎sklearn/ensemble/tests/test_gradient_boosting.py‎
Lines changed: 107 additions & 34 deletions
@@ -246,6 +246,11 @@ Changelog
   :pr:`20811`, :pr:`20567` and :pr:`21814` by
   :user:`Christian Lorentzen <lorentzenchr>`.
 
+- |Fix| Change the parameter `validation_fraction` in
+  :class:`ensemble.BaseGradientBoosting` so that an error is raised if anything
+  other than a float is passed in as an argument.
+  :pr:`21632` by :user:`Genesis Valencia <genvalen>`
+
 - |API| Changed the default of :func:`max_features` to 1.0 for
   :class:`ensemble.RandomForestRegressor` and to `"sqrt"` for
   :class:`ensemble.RandomForestClassifier`. Note that these give the same fit
 
@@ -50,6 +50,7 @@
 
 from ..utils import check_random_state
 from ..utils import check_array
+from ..utils import check_scalar
 from ..utils import column_or_1d
 from ..utils.validation import check_is_fitted, _check_sample_weight
 from ..utils.multiclass import check_classification_targets
@@ -265,21 +266,28 @@ def _fit_stage(
 
     def _check_params(self):
         """Check validity of parameters and raise ValueError if not valid."""
-        if self.n_estimators <= 0:
-            raise ValueError(
-                "n_estimators must be greater than 0 but was %r" % self.n_estimators
-            )
 
-        if self.learning_rate <= 0.0:
-            raise ValueError(
-                "learning_rate must be greater than 0 but was %r" % self.learning_rate
-            )
+        check_scalar(
+            self.learning_rate,
+            name="learning_rate",
+            target_type=numbers.Real,
+            min_val=0.0,
+            include_boundaries="neither",
+        )
+
+        check_scalar(
+            self.n_estimators,
+            name="n_estimators",
+            target_type=numbers.Integral,
+            min_val=1,
+            include_boundaries="left",
+        )
 
         if (
             self.loss not in self._SUPPORTED_LOSS
             or self.loss not in _gb_losses.LOSS_FUNCTIONS
         ):
-            raise ValueError("Loss '{0:s}' not supported. ".format(self.loss))
+            raise ValueError(f"Loss {self.loss!r} not supported. ")
 
         # TODO: Remove in v1.2
         if self.loss == "ls":
@@ -313,8 +321,14 @@ def _check_params(self):
         else:
             self.loss_ = loss_class()
 
-        if not (0.0 < self.subsample <= 1.0):
-            raise ValueError("subsample must be in (0,1] but was %r" % self.subsample)
+        check_scalar(
+            self.subsample,
+            name="subsample",
+            target_type=numbers.Real,
+            min_val=0.0,
+            max_val=1.0,
+            include_boundaries="right",
+        )
 
         if self.init is not None:
             # init must be an estimator or 'zero'
@@ -323,11 +337,17 @@ def _check_params(self):
             elif not (isinstance(self.init, str) and self.init == "zero"):
                 raise ValueError(
                     "The init parameter must be an estimator or 'zero'. "
-                    "Got init={}".format(self.init)
+                    f"Got init={self.init!r}"
                 )
 
-        if not (0.0 < self.alpha < 1.0):
-            raise ValueError("alpha must be in (0.0, 1.0) but was %r" % self.alpha)
+        check_scalar(
+            self.alpha,
+            name="alpha",
+            target_type=numbers.Real,
+            min_val=0.0,
+            max_val=1.0,
+            include_boundaries="neither",
+        )
 
         if isinstance(self.max_features, str):
             if self.max_features == "auto":
@@ -341,29 +361,66 @@ def _check_params(self):
                 max_features = max(1, int(np.log2(self.n_features_in_)))
             else:
                 raise ValueError(
-                    "Invalid value for max_features: %r. "
-                    "Allowed string values are 'auto', 'sqrt' "
-                    "or 'log2'."
-                    % self.max_features
+                    f"Invalid value for max_features: {self.max_features!r}. "
+                    "Allowed string values are 'auto', 'sqrt' or 'log2'."
                 )
         elif self.max_features is None:
             max_features = self.n_features_in_
         elif isinstance(self.max_features, numbers.Integral):
+            check_scalar(
+                self.max_features,
+                name="max_features",
+                target_type=numbers.Integral,
+                min_val=1,
+                include_boundaries="left",
+            )
             max_features = self.max_features
         else:  # float
-            if 0.0 < self.max_features <= 1.0:
-                max_features = max(int(self.max_features * self.n_features_in_), 1)
-            else:
-                raise ValueError("max_features must be in (0, n_features]")
+            check_scalar(
+                self.max_features,
+                name="max_features",
+                target_type=numbers.Real,
+                min_val=0.0,
+                max_val=1.0,
+                include_boundaries="right",
+            )
+            max_features = max(1, int(self.max_features * self.n_features_in_))
 
         self.max_features_ = max_features
 
-        if not isinstance(self.n_iter_no_change, (numbers.Integral, type(None))):
-            raise ValueError(
-                "n_iter_no_change should either be None or an integer. %r was passed"
-                % self.n_iter_no_change
+        check_scalar(
+            self.verbose,
+            name="verbose",
+            target_type=(numbers.Integral, np.bool_),
+            min_val=0,
+        )
+
+        check_scalar(
+            self.validation_fraction,
+            name="validation_fraction",
+            target_type=numbers.Real,
+            min_val=0.0,
+            max_val=1.0,
+            include_boundaries="neither",
+        )
+
+        if self.n_iter_no_change is not None:
+            check_scalar(
+                self.n_iter_no_change,
+                name="n_iter_no_change",
+                target_type=numbers.Integral,
+                min_val=1,
+                include_boundaries="left",
             )
 
+        check_scalar(
+            self.tol,
+            name="tol",
+            target_type=numbers.Real,
+            min_val=0.0,
+            include_boundaries="neither",
+        )
+
     def _init_state(self):
         """Initialize model state and allocate model state data structures."""
 
@@ -477,6 +534,11 @@ def fit(self, X, y, sample_weight=None, monitor=None):
             )
 
         # if not warmstart - clear the estimator state
+        check_scalar(
+            self.warm_start,
+            name="warm_start",
+            target_type=(numbers.Integral, np.bool_),
+        )
         if not self.warm_start:
             self._clear_state()
 
@@ -499,6 +561,8 @@ def fit(self, X, y, sample_weight=None, monitor=None):
         else:
             y = self._validate_y(y)
 
+        self._check_params()
+
         if self.n_iter_no_change is not None:
             stratify = y if is_classifier(self) else None
             X, X_val, y, y_val, sample_weight, sample_weight_val = train_test_split(
@@ -523,8 +587,6 @@ def fit(self, X, y, sample_weight=None, monitor=None):
         else:
             X_val = y_val = sample_weight_val = None
 
-        self._check_params()
-
         if not self._is_initialized():
             # init state
             self._init_state()
 
@@ -80,60 +80,131 @@ def test_classification_toy(loss):
 @pytest.mark.parametrize(
     "params, err_type, err_msg",
     [
-        ({"n_estimators": 0}, ValueError, "n_estimators must be greater than 0"),
-        ({"n_estimators": -1}, ValueError, "n_estimators must be greater than 0"),
-        ({"learning_rate": 0}, ValueError, "learning_rate must be greater than 0"),
-        ({"learning_rate": -1.0}, ValueError, "learning_rate must be greater than 0"),
+        ({"learning_rate": 0}, ValueError, "learning_rate == 0, must be > 0.0"),
+        (
+            {"learning_rate": "foo"},
+            TypeError,
+            "learning_rate must be an instance of <class 'numbers.Real'>",
+        ),
+        ({"n_estimators": 0}, ValueError, "n_estimators == 0, must be >= 1"),
+        (
+            {"n_estimators": 1.5},
+            TypeError,
+            "n_estimators must be an instance of <class 'numbers.Integral'>,",
+        ),
         ({"loss": "foobar"}, ValueError, "Loss 'foobar' not supported"),
+        ({"subsample": 0.0}, ValueError, "subsample == 0.0, must be > 0.0"),
+        ({"subsample": 1.1}, ValueError, "subsample == 1.1, must be <= 1.0"),
         (
-            {"min_samples_split": 0.0},
-            ValueError,
-            "min_samples_split == 0.0, must be > 0.0",
+            {"subsample": "foo"},
+            TypeError,
+            "subsample must be an instance of <class 'numbers.Real'>",
+        ),
+        ({"init": {}}, ValueError, "The init parameter must be an estimator or 'zero'"),
+        ({"max_features": 0}, ValueError, "max_features == 0, must be >= 1"),
+        ({"max_features": 0.0}, ValueError, "max_features == 0.0, must be > 0.0"),
+        ({"max_features": 1.1}, ValueError, "max_features == 1.1, must be <= 1.0"),
+        ({"max_features": "foobar"}, ValueError, "Invalid value for max_features."),
+        ({"verbose": -1}, ValueError, "verbose == -1, must be >= 0"),
+        (
+            {"verbose": "foo"},
+            TypeError,
+            "verbose must be an instance of",
         ),
+        ({"warm_start": "foo"}, TypeError, "warm_start must be an instance of"),
         (
-            {"min_samples_split": -1.0},
+            {"validation_fraction": 0.0},
             ValueError,
-            "min_samples_split == -1.0, must be > 0.0",
+            "validation_fraction == 0.0, must be > 0.0",
         ),
         (
-            {"min_samples_split": 1.1},
+            {"validation_fraction": 1.0},
             ValueError,
-            "min_samples_split == 1.1, must be <= 1.0.",
+            "validation_fraction == 1.0, must be < 1.0",
+        ),
+        (
+            {"validation_fraction": "foo"},
+            TypeError,
+            "validation_fraction must be an instance of <class 'numbers.Real'>",
+        ),
+        ({"n_iter_no_change": 0}, ValueError, "n_iter_no_change == 0, must be >= 1"),
+        (
+            {"n_iter_no_change": 1.5},
+            TypeError,
+            "n_iter_no_change must be an instance of <class 'numbers.Integral'>,",
+        ),
+        ({"tol": 0.0}, ValueError, "tol == 0.0, must be > 0.0"),
+        (
+            {"tol": "foo"},
+            TypeError,
+            "tol must be an instance of <class 'numbers.Real'>,",
         ),
+        # The following parameters are checked in BaseDecisionTree
         ({"min_samples_leaf": 0}, ValueError, "min_samples_leaf == 0, must be >= 1"),
+        ({"min_samples_leaf": 0.0}, ValueError, "min_samples_leaf == 0.0, must be > 0"),
+        (
+            {"min_samples_leaf": "foo"},
+            TypeError,
+            "min_samples_leaf must be an instance of <class 'numbers.Real'>",
+        ),
+        ({"min_samples_split": 1}, ValueError, "min_samples_split == 1, must be >= 2"),
         (
-            {"min_samples_leaf": -1.0},
+            {"min_samples_split": 0.0},
             ValueError,
-            "min_samples_leaf == -1.0, must be > 0.0.",
+            "min_samples_split == 0.0, must be > 0.0",
         ),
         (
-            {"min_weight_fraction_leaf": -1.0},
+            {"min_samples_split": 1.1},
             ValueError,
-            "min_weight_fraction_leaf == -1.0, must be >= 0",
+            "min_samples_split == 1.1, must be <= 1.0",
         ),
         (
-            {"min_weight_fraction_leaf": 0.6},
+            {"min_samples_split": "foo"},
+            TypeError,
+            "min_samples_split must be an instance of <class 'numbers.Real'>",
+        ),
+        (
+            {"min_weight_fraction_leaf": -1},
             ValueError,
-            "min_weight_fraction_leaf == 0.6, must be <= 0.5.",
+            "min_weight_fraction_leaf == -1, must be >= 0.0",
         ),
-        ({"subsample": 0.0}, ValueError, r"subsample must be in \(0,1\]"),
-        ({"subsample": 1.1}, ValueError, r"subsample must be in \(0,1\]"),
-        ({"subsample": -0.1}, ValueError, r"subsample must be in \(0,1\]"),
-        ({"max_depth": -0.1}, TypeError, "max_depth must be an instance of"),
-        ({"max_depth": 0}, ValueError, "max_depth == 0, must be >= 1."),
-        ({"init": {}}, ValueError, "The init parameter must be an estimator or 'zero'"),
-        ({"max_features": "invalid"}, ValueError, "Invalid value for max_features:"),
-        ({"max_features": 0}, ValueError, "max_features == 0, must be >= 1"),
-        ({"max_features": 100}, ValueError, "max_features == 100, must be <="),
         (
-            {"max_features": -0.1},
+            {"min_weight_fraction_leaf": 0.6},
             ValueError,
-            r"max_features must be in \(0, n_features\]",
+            "min_weight_fraction_leaf == 0.6, must be <= 0.5",
         ),
         (
-            {"n_iter_no_change": "invalid"},
+            {"min_weight_fraction_leaf": "foo"},
+            TypeError,
+            "min_weight_fraction_leaf must be an instance of <class 'numbers.Real'>",
+        ),
+        ({"max_leaf_nodes": 0}, ValueError, "max_leaf_nodes == 0, must be >= 2"),
+        (
+            {"max_leaf_nodes": 1.5},
+            TypeError,
+            "max_leaf_nodes must be an instance of <class 'numbers.Integral'>",
+        ),
+        ({"max_depth": -1}, ValueError, "max_depth == -1, must be >= 1"),
+        (
+            {"max_depth": 1.1},
+            TypeError,
+            "max_depth must be an instance of <class 'numbers.Integral'>",
+        ),
+        (
+            {"min_impurity_decrease": -1},
             ValueError,
-            "n_iter_no_change should either be",
+            "min_impurity_decrease == -1, must be >= 0.0",
+        ),
+        (
+            {"min_impurity_decrease": "foo"},
+            TypeError,
+            "min_impurity_decrease must be an instance of <class 'numbers.Real'>",
+        ),
+        ({"ccp_alpha": -1.0}, ValueError, "ccp_alpha == -1.0, must be >= 0.0"),
+        (
+            {"ccp_alpha": "foo"},
+            TypeError,
+            "ccp_alpha must be an instance of <class 'numbers.Real'>",
         ),
         ({"criterion": "mae"}, ValueError, "criterion='mae' is not supported."),
     ],
@@ -158,8 +229,10 @@ def test_gbdt_parameter_checks(GradientBoosting, X, y, params, err_type, err_msg
 @pytest.mark.parametrize(
     "params, err_msg",
     [
-        ({"loss": "huber", "alpha": 1.2}, r"alpha must be in \(0.0, 1.0\)"),
-        ({"loss": "quantile", "alpha": 1.2}, r"alpha must be in \(0.0, 1.0\)"),
+        ({"loss": "huber", "alpha": 0.0}, "alpha == 0.0, must be > 0.0"),
+        ({"loss": "quantile", "alpha": 0.0}, "alpha == 0.0, must be > 0.0"),
+        ({"loss": "huber", "alpha": 1.2}, "alpha == 1.2, must be < 1.0"),
+        ({"loss": "quantile", "alpha": 1.2}, "alpha == 1.2, must be < 1.0"),
     ],
 )
 def test_gbdt_loss_alpha_error(params, err_msg):
@@ -1389,7 +1462,7 @@ def test_early_stopping_n_classes():
     X = [[1]] * 10
     y = [0, 0] + [1] * 8  # only 2 negative class over 10 samples
     gb = GradientBoostingClassifier(
-        n_iter_no_change=5, random_state=0, validation_fraction=8
+        n_iter_no_change=5, random_state=0, validation_fraction=0.8
     )
     with pytest.raises(
         ValueError, match="The training data after the early stopping split"
@@ -1398,7 +1471,7 @@ def test_early_stopping_n_classes():
 
     # No error if we let training data be big enough
     gb = GradientBoostingClassifier(
-        n_iter_no_change=5, random_state=0, validation_fraction=4
+        n_iter_no_change=5, random_state=0, validation_fraction=0.4
     )