From 1565854b48ce9d8264080b2c1feba528e32bc9e9 Mon Sep 17 00:00:00 2001 From: reshamas Date: Sun, 2 Jan 2022 09:47:05 -0500 Subject: [PATCH 1/9] adding valid intervals for SGDClassifier class parameters --- sklearn/linear_model/_stochastic_gradient.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index c07232b6c23fe..21bae93a05d85 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -955,11 +955,13 @@ class SGDClassifier(BaseSGDClassifier): value, the stronger the regularization. Also used to compute the learning rate when set to `learning_rate` is set to 'optimal'. + Values should be in the range `[0.0, inf)`. l1_ratio : float, default=0.15 The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1. l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1. Only used if `penalty` is 'elasticnet'. + Values should be in the range `[0.0, 1.0]`. fit_intercept : bool, default=True Whether the intercept should be estimated or not. If False, the @@ -969,6 +971,7 @@ class SGDClassifier(BaseSGDClassifier): The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. + Values should be in the range `[1, inf)`. .. versionadded:: 0.19 @@ -978,6 +981,7 @@ class SGDClassifier(BaseSGDClassifier): epochs. Convergence is checked against the training loss or the validation loss depending on the `early_stopping` parameter. + Values should be in the range `(0.0, inf)`. .. versionadded:: 0.19 @@ -986,6 +990,7 @@ class SGDClassifier(BaseSGDClassifier): verbose : int, default=0 The verbosity level. + Values should be in the range `[0, inf)`. epsilon : float, default=0.1 Epsilon in the epsilon-insensitive loss functions; only if `loss` is From 632fdadd295adf306ab3d1938a31e68deaa9c2b1 Mon Sep 17 00:00:00 2001 From: reshamas Date: Sun, 2 Jan 2022 13:17:08 -0500 Subject: [PATCH 2/9] adding more intervals --- sklearn/linear_model/_stochastic_gradient.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index 21bae93a05d85..1bc49493b4d60 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -999,6 +999,7 @@ class SGDClassifier(BaseSGDClassifier): important to get the prediction exactly right. For epsilon-insensitive, any differences between the current prediction and the correct label are ignored if they are less than this threshold. + Values should be in the range `[0.0, inf)`. n_jobs : int, default=None The number of CPUs to use to do the OVA (One Versus All, for @@ -1006,11 +1007,13 @@ class SGDClassifier(BaseSGDClassifier): ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. + Values should be in the range `[-1, inf)`. random_state : int, RandomState instance, default=None Used for shuffling the data, when ``shuffle`` is set to ``True``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `. + Values should be in the range `[0, inf)`. learning_rate : str, default='optimal' The learning rate schedule: @@ -1031,6 +1034,7 @@ class SGDClassifier(BaseSGDClassifier): The initial learning rate for the 'constant', 'invscaling' or 'adaptive' schedules. The default value is 0.0 as eta0 is not used by the default schedule 'optimal'. + Values should be in the range `(0.0, inf)`. power_t : float, default=0.5 The exponent for inverse scaling learning rate [default 0.5]. From 3dea8d7697ce395fb0bfa1934ed091188873e263 Mon Sep 17 00:00:00 2001 From: Reshama Shaikh Date: Mon, 3 Jan 2022 10:22:40 -0500 Subject: [PATCH 3/9] tol can have value of 0.0 Co-authored-by: Olivier Grisel --- sklearn/linear_model/_stochastic_gradient.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index 1bc49493b4d60..f7314eaaadfc0 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -981,7 +981,7 @@ class SGDClassifier(BaseSGDClassifier): epochs. Convergence is checked against the training loss or the validation loss depending on the `early_stopping` parameter. - Values should be in the range `(0.0, inf)`. + Values should be in the range `[0.0, inf)`. .. versionadded:: 0.19 From 1420e52dfd3ff8903d6940135ab054a79af95142 Mon Sep 17 00:00:00 2001 From: Reshama Shaikh Date: Mon, 3 Jan 2022 10:23:40 -0500 Subject: [PATCH 4/9] for n_jobs, remove interval range Co-authored-by: Olivier Grisel --- sklearn/linear_model/_stochastic_gradient.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index f7314eaaadfc0..89764692dfb90 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -1007,7 +1007,6 @@ class SGDClassifier(BaseSGDClassifier): ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. - Values should be in the range `[-1, inf)`. random_state : int, RandomState instance, default=None Used for shuffling the data, when ``shuffle`` is set to ``True``. From f9ad2feefc7379cd84b6685066131fb4d51abed8 Mon Sep 17 00:00:00 2001 From: Reshama Shaikh Date: Mon, 3 Jan 2022 10:24:34 -0500 Subject: [PATCH 5/9] random_state has upper bound Reminder: update glossary as well Co-authored-by: Olivier Grisel --- sklearn/linear_model/_stochastic_gradient.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index 89764692dfb90..82b7d909606bb 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -1012,7 +1012,7 @@ class SGDClassifier(BaseSGDClassifier): Used for shuffling the data, when ``shuffle`` is set to ``True``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `. - Values should be in the range `[0, inf)`. + Integer values should be in the range `[0, 2**32 - 1]`. learning_rate : str, default='optimal' The learning rate schedule: From 0358485ffd52f6efee4488ca423b146e9b8da879 Mon Sep 17 00:00:00 2001 From: reshamas Date: Mon, 3 Jan 2022 10:32:56 -0500 Subject: [PATCH 6/9] adding int range for random_state in glossary --- doc/glossary.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/glossary.rst b/doc/glossary.rst index 2b4c6af0d1866..b79d591494b01 100644 --- a/doc/glossary.rst +++ b/doc/glossary.rst @@ -1604,6 +1604,7 @@ functions or non-estimator constructors. number of different distinct random seeds. Popular integer random seeds are 0 and `42 `_. + Integer values should be in the range `[0, 2**32 - 1]`. A :class:`numpy.random.RandomState` instance Use the provided random state, only affecting other users From 0d4266f1b545e30b29d7c00957e3738eb4ec23a1 Mon Sep 17 00:00:00 2001 From: reshamas Date: Wed, 5 Jan 2022 12:11:09 -0500 Subject: [PATCH 7/9] added interval ranges for 3 parameters --- sklearn/linear_model/_stochastic_gradient.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index 82b7d909606bb..07bce0d2392ad 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -1019,12 +1019,12 @@ class SGDClassifier(BaseSGDClassifier): - 'constant': `eta = eta0` - 'optimal': `eta = 1.0 / (alpha * (t + t0))` - where t0 is chosen by a heuristic proposed by Leon Bottou. + where `t0` is chosen by a heuristic proposed by Leon Bottou. - 'invscaling': `eta = eta0 / pow(t, power_t)` - - 'adaptive': eta = eta0, as long as the training keeps decreasing. + - 'adaptive': `eta = eta0`, as long as the training keeps decreasing. Each time n_iter_no_change consecutive epochs fail to decrease the training loss by tol or fail to increase validation score by tol if - early_stopping is True, the current learning rate is divided by 5. + early_stopping is `True`, the current learning rate is divided by 5. .. versionadded:: 0.20 Added 'adaptive' option @@ -1037,10 +1037,11 @@ class SGDClassifier(BaseSGDClassifier): power_t : float, default=0.5 The exponent for inverse scaling learning rate [default 0.5]. + Values should be in the range `(-inf, inf)`. early_stopping : bool, default=False Whether to use early stopping to terminate training when validation - score is not improving. If set to True, it will automatically set aside + score is not improving. If set to `True`, it will automatically set aside a stratified fraction of training data as validation and terminate training when validation score returned by the `score` method is not improving by at least tol for n_iter_no_change consecutive epochs. @@ -1052,6 +1053,7 @@ class SGDClassifier(BaseSGDClassifier): The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if `early_stopping` is True. + Values should be in the range `(0.0, 1.0)`. .. versionadded:: 0.20 Added 'validation_fraction' option @@ -1061,6 +1063,7 @@ class SGDClassifier(BaseSGDClassifier): fitting. Convergence is checked against the training loss or the validation loss depending on the `early_stopping` parameter. + Integer values should be in the range `[1, max_iter)`. .. versionadded:: 0.20 Added 'n_iter_no_change' option @@ -1089,11 +1092,12 @@ class SGDClassifier(BaseSGDClassifier): existing counter. average : bool or int, default=False - When set to True, computes the averaged SGD weights across all + When set to `True`, computes the averaged SGD weights across all updates and stores the result in the ``coef_`` attribute. If set to an int greater than 1, averaging will begin once the total number of samples seen reaches `average`. So ``average=10`` will begin averaging after seeing 10 samples. + Integer values should be in the range `[1, n_samples]`. Attributes ---------- From 45c14d4d5a23e76bf4ed92f308af426614d0de06 Mon Sep 17 00:00:00 2001 From: Reshama Shaikh Date: Fri, 7 Jan 2022 09:35:47 -0500 Subject: [PATCH 8/9] brackets around parameter early_stopping Co-authored-by: Olivier Grisel --- sklearn/linear_model/_stochastic_gradient.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index 07bce0d2392ad..cf3a7de7756a0 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -1024,7 +1024,7 @@ class SGDClassifier(BaseSGDClassifier): - 'adaptive': `eta = eta0`, as long as the training keeps decreasing. Each time n_iter_no_change consecutive epochs fail to decrease the training loss by tol or fail to increase validation score by tol if - early_stopping is `True`, the current learning rate is divided by 5. + `early_stopping` is `True`, the current learning rate is divided by 5. .. versionadded:: 0.20 Added 'adaptive' option From dfd447e31352cdd1d49879bdfa54b3fe449397b5 Mon Sep 17 00:00:00 2001 From: reshamas Date: Thu, 13 Jan 2022 14:34:48 -0500 Subject: [PATCH 9/9] change wording 'should be'-->'must be' --- doc/glossary.rst | 2 +- sklearn/linear_model/_stochastic_gradient.py | 24 ++++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/doc/glossary.rst b/doc/glossary.rst index b79d591494b01..0d67b224ece0f 100644 --- a/doc/glossary.rst +++ b/doc/glossary.rst @@ -1604,7 +1604,7 @@ functions or non-estimator constructors. number of different distinct random seeds. Popular integer random seeds are 0 and `42 `_. - Integer values should be in the range `[0, 2**32 - 1]`. + Integer values must be in the range `[0, 2**32 - 1]`. A :class:`numpy.random.RandomState` instance Use the provided random state, only affecting other users diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index cf3a7de7756a0..8febc85b7946c 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -955,13 +955,13 @@ class SGDClassifier(BaseSGDClassifier): value, the stronger the regularization. Also used to compute the learning rate when set to `learning_rate` is set to 'optimal'. - Values should be in the range `[0.0, inf)`. + Values must be in the range `[0.0, inf)`. l1_ratio : float, default=0.15 The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1. l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1. Only used if `penalty` is 'elasticnet'. - Values should be in the range `[0.0, 1.0]`. + Values must be in the range `[0.0, 1.0]`. fit_intercept : bool, default=True Whether the intercept should be estimated or not. If False, the @@ -971,7 +971,7 @@ class SGDClassifier(BaseSGDClassifier): The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. - Values should be in the range `[1, inf)`. + Values must be in the range `[1, inf)`. .. versionadded:: 0.19 @@ -981,7 +981,7 @@ class SGDClassifier(BaseSGDClassifier): epochs. Convergence is checked against the training loss or the validation loss depending on the `early_stopping` parameter. - Values should be in the range `[0.0, inf)`. + Values must be in the range `[0.0, inf)`. .. versionadded:: 0.19 @@ -990,7 +990,7 @@ class SGDClassifier(BaseSGDClassifier): verbose : int, default=0 The verbosity level. - Values should be in the range `[0, inf)`. + Values must be in the range `[0, inf)`. epsilon : float, default=0.1 Epsilon in the epsilon-insensitive loss functions; only if `loss` is @@ -999,7 +999,7 @@ class SGDClassifier(BaseSGDClassifier): important to get the prediction exactly right. For epsilon-insensitive, any differences between the current prediction and the correct label are ignored if they are less than this threshold. - Values should be in the range `[0.0, inf)`. + Values must be in the range `[0.0, inf)`. n_jobs : int, default=None The number of CPUs to use to do the OVA (One Versus All, for @@ -1012,7 +1012,7 @@ class SGDClassifier(BaseSGDClassifier): Used for shuffling the data, when ``shuffle`` is set to ``True``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `. - Integer values should be in the range `[0, 2**32 - 1]`. + Integer values must be in the range `[0, 2**32 - 1]`. learning_rate : str, default='optimal' The learning rate schedule: @@ -1033,11 +1033,11 @@ class SGDClassifier(BaseSGDClassifier): The initial learning rate for the 'constant', 'invscaling' or 'adaptive' schedules. The default value is 0.0 as eta0 is not used by the default schedule 'optimal'. - Values should be in the range `(0.0, inf)`. + Values must be in the range `(0.0, inf)`. power_t : float, default=0.5 The exponent for inverse scaling learning rate [default 0.5]. - Values should be in the range `(-inf, inf)`. + Values must be in the range `(-inf, inf)`. early_stopping : bool, default=False Whether to use early stopping to terminate training when validation @@ -1053,7 +1053,7 @@ class SGDClassifier(BaseSGDClassifier): The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if `early_stopping` is True. - Values should be in the range `(0.0, 1.0)`. + Values must be in the range `(0.0, 1.0)`. .. versionadded:: 0.20 Added 'validation_fraction' option @@ -1063,7 +1063,7 @@ class SGDClassifier(BaseSGDClassifier): fitting. Convergence is checked against the training loss or the validation loss depending on the `early_stopping` parameter. - Integer values should be in the range `[1, max_iter)`. + Integer values must be in the range `[1, max_iter)`. .. versionadded:: 0.20 Added 'n_iter_no_change' option @@ -1097,7 +1097,7 @@ class SGDClassifier(BaseSGDClassifier): an int greater than 1, averaging will begin once the total number of samples seen reaches `average`. So ``average=10`` will begin averaging after seeing 10 samples. - Integer values should be in the range `[1, n_samples]`. + Integer values must be in the range `[1, n_samples]`. Attributes ----------