From 1565854b48ce9d8264080b2c1feba528e32bc9e9 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Sun, 2 Jan 2022 09:47:05 -0500
Subject: [PATCH 1/9] adding valid intervals for SGDClassifier class parameters

---
 sklearn/linear_model/_stochastic_gradient.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index c07232b6c23fe..21bae93a05d85 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -955,11 +955,13 @@ class SGDClassifier(BaseSGDClassifier):
         value, the stronger the regularization.
         Also used to compute the learning rate when set to `learning_rate` is
         set to 'optimal'.
+        Values should be in the range `[0.0, inf)`.
 
     l1_ratio : float, default=0.15
         The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.
         l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.
         Only used if `penalty` is 'elasticnet'.
+        Values should be in the range `[0.0, 1.0]`.
 
     fit_intercept : bool, default=True
         Whether the intercept should be estimated or not. If False, the
@@ -969,6 +971,7 @@ class SGDClassifier(BaseSGDClassifier):
         The maximum number of passes over the training data (aka epochs).
         It only impacts the behavior in the ``fit`` method, and not the
         :meth:`partial_fit` method.
+        Values should be in the range `[1, inf)`.
 
         .. versionadded:: 0.19
 
@@ -978,6 +981,7 @@ class SGDClassifier(BaseSGDClassifier):
         epochs.
         Convergence is checked against the training loss or the
         validation loss depending on the `early_stopping` parameter.
+        Values should be in the range `(0.0, inf)`.
 
         .. versionadded:: 0.19
 
@@ -986,6 +990,7 @@ class SGDClassifier(BaseSGDClassifier):
 
     verbose : int, default=0
         The verbosity level.
+        Values should be in the range `[0, inf)`.
 
     epsilon : float, default=0.1
         Epsilon in the epsilon-insensitive loss functions; only if `loss` is

From 632fdadd295adf306ab3d1938a31e68deaa9c2b1 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Sun, 2 Jan 2022 13:17:08 -0500
Subject: [PATCH 2/9] adding more intervals

---
 sklearn/linear_model/_stochastic_gradient.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index 21bae93a05d85..1bc49493b4d60 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -999,6 +999,7 @@ class SGDClassifier(BaseSGDClassifier):
         important to get the prediction exactly right.
         For epsilon-insensitive, any differences between the current prediction
         and the correct label are ignored if they are less than this threshold.
+        Values should be in the range `[0.0, inf)`.
 
     n_jobs : int, default=None
         The number of CPUs to use to do the OVA (One Versus All, for
@@ -1006,11 +1007,13 @@ class SGDClassifier(BaseSGDClassifier):
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
+        Values should be in the range `[-1, inf)`.
 
     random_state : int, RandomState instance, default=None
         Used for shuffling the data, when ``shuffle`` is set to ``True``.
         Pass an int for reproducible output across multiple function calls.
         See :term:`Glossary <random_state>`.
+        Values should be in the range `[0, inf)`.
 
     learning_rate : str, default='optimal'
         The learning rate schedule:
@@ -1031,6 +1034,7 @@ class SGDClassifier(BaseSGDClassifier):
         The initial learning rate for the 'constant', 'invscaling' or
         'adaptive' schedules. The default value is 0.0 as eta0 is not used by
         the default schedule 'optimal'.
+        Values should be in the range `(0.0, inf)`.
 
     power_t : float, default=0.5
         The exponent for inverse scaling learning rate [default 0.5].

From 3dea8d7697ce395fb0bfa1934ed091188873e263 Mon Sep 17 00:00:00 2001
From: Reshama Shaikh <reshama.stat@gmail.com>
Date: Mon, 3 Jan 2022 10:22:40 -0500
Subject: [PATCH 3/9] tol can have value of 0.0

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/linear_model/_stochastic_gradient.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index 1bc49493b4d60..f7314eaaadfc0 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -981,7 +981,7 @@ class SGDClassifier(BaseSGDClassifier):
         epochs.
         Convergence is checked against the training loss or the
         validation loss depending on the `early_stopping` parameter.
-        Values should be in the range `(0.0, inf)`.
+        Values should be in the range `[0.0, inf)`.
 
         .. versionadded:: 0.19
 

From 1420e52dfd3ff8903d6940135ab054a79af95142 Mon Sep 17 00:00:00 2001
From: Reshama Shaikh <reshama.stat@gmail.com>
Date: Mon, 3 Jan 2022 10:23:40 -0500
Subject: [PATCH 4/9] for n_jobs, remove interval range

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/linear_model/_stochastic_gradient.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index f7314eaaadfc0..89764692dfb90 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -1007,7 +1007,6 @@ class SGDClassifier(BaseSGDClassifier):
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
-        Values should be in the range `[-1, inf)`.
 
     random_state : int, RandomState instance, default=None
         Used for shuffling the data, when ``shuffle`` is set to ``True``.

From f9ad2feefc7379cd84b6685066131fb4d51abed8 Mon Sep 17 00:00:00 2001
From: Reshama Shaikh <reshama.stat@gmail.com>
Date: Mon, 3 Jan 2022 10:24:34 -0500
Subject: [PATCH 5/9] random_state has upper bound

Reminder: update glossary as well

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/linear_model/_stochastic_gradient.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index 89764692dfb90..82b7d909606bb 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -1012,7 +1012,7 @@ class SGDClassifier(BaseSGDClassifier):
         Used for shuffling the data, when ``shuffle`` is set to ``True``.
         Pass an int for reproducible output across multiple function calls.
         See :term:`Glossary <random_state>`.
-        Values should be in the range `[0, inf)`.
+        Integer values should be in the range `[0, 2**32 - 1]`.
 
     learning_rate : str, default='optimal'
         The learning rate schedule:

From 0358485ffd52f6efee4488ca423b146e9b8da879 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Mon, 3 Jan 2022 10:32:56 -0500
Subject: [PATCH 6/9] adding int range for random_state in glossary

---
 doc/glossary.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/glossary.rst b/doc/glossary.rst
index 2b4c6af0d1866..b79d591494b01 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -1604,6 +1604,7 @@ functions or non-estimator constructors.
             number of different distinct random seeds. Popular integer
             random seeds are 0 and `42
             <https://en.wikipedia.org/wiki/Answer_to_the_Ultimate_Question_of_Life%2C_the_Universe%2C_and_Everything>`_.
+            Integer values should be in the range `[0, 2**32 - 1]`.
 
         A :class:`numpy.random.RandomState` instance
             Use the provided random state, only affecting other users

From 0d4266f1b545e30b29d7c00957e3738eb4ec23a1 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Wed, 5 Jan 2022 12:11:09 -0500
Subject: [PATCH 7/9] added interval ranges for 3 parameters

---
 sklearn/linear_model/_stochastic_gradient.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index 82b7d909606bb..07bce0d2392ad 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -1019,12 +1019,12 @@ class SGDClassifier(BaseSGDClassifier):
 
         - 'constant': `eta = eta0`
         - 'optimal': `eta = 1.0 / (alpha * (t + t0))`
-          where t0 is chosen by a heuristic proposed by Leon Bottou.
+          where `t0` is chosen by a heuristic proposed by Leon Bottou.
         - 'invscaling': `eta = eta0 / pow(t, power_t)`
-        - 'adaptive': eta = eta0, as long as the training keeps decreasing.
+        - 'adaptive': `eta = eta0`, as long as the training keeps decreasing.
           Each time n_iter_no_change consecutive epochs fail to decrease the
           training loss by tol or fail to increase validation score by tol if
-          early_stopping is True, the current learning rate is divided by 5.
+          early_stopping is `True`, the current learning rate is divided by 5.
 
             .. versionadded:: 0.20
                 Added 'adaptive' option
@@ -1037,10 +1037,11 @@ class SGDClassifier(BaseSGDClassifier):
 
     power_t : float, default=0.5
         The exponent for inverse scaling learning rate [default 0.5].
+        Values should be in the range `(-inf, inf)`.
 
     early_stopping : bool, default=False
         Whether to use early stopping to terminate training when validation
-        score is not improving. If set to True, it will automatically set aside
+        score is not improving. If set to `True`, it will automatically set aside
         a stratified fraction of training data as validation and terminate
         training when validation score returned by the `score` method is not
         improving by at least tol for n_iter_no_change consecutive epochs.
@@ -1052,6 +1053,7 @@ class SGDClassifier(BaseSGDClassifier):
         The proportion of training data to set aside as validation set for
         early stopping. Must be between 0 and 1.
         Only used if `early_stopping` is True.
+        Values should be in the range `(0.0, 1.0)`.
 
         .. versionadded:: 0.20
             Added 'validation_fraction' option
@@ -1061,6 +1063,7 @@ class SGDClassifier(BaseSGDClassifier):
         fitting.
         Convergence is checked against the training loss or the
         validation loss depending on the `early_stopping` parameter.
+        Integer values should be in the range `[1, max_iter)`.
 
         .. versionadded:: 0.20
             Added 'n_iter_no_change' option
@@ -1089,11 +1092,12 @@ class SGDClassifier(BaseSGDClassifier):
         existing counter.
 
     average : bool or int, default=False
-        When set to True, computes the averaged SGD weights across all
+        When set to `True`, computes the averaged SGD weights across all
         updates and stores the result in the ``coef_`` attribute. If set to
         an int greater than 1, averaging will begin once the total number of
         samples seen reaches `average`. So ``average=10`` will begin
         averaging after seeing 10 samples.
+        Integer values should be in the range `[1, n_samples]`.
 
     Attributes
     ----------

From 45c14d4d5a23e76bf4ed92f308af426614d0de06 Mon Sep 17 00:00:00 2001
From: Reshama Shaikh <reshama.stat@gmail.com>
Date: Fri, 7 Jan 2022 09:35:47 -0500
Subject: [PATCH 8/9] brackets around parameter early_stopping

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/linear_model/_stochastic_gradient.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index 07bce0d2392ad..cf3a7de7756a0 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -1024,7 +1024,7 @@ class SGDClassifier(BaseSGDClassifier):
         - 'adaptive': `eta = eta0`, as long as the training keeps decreasing.
           Each time n_iter_no_change consecutive epochs fail to decrease the
           training loss by tol or fail to increase validation score by tol if
-          early_stopping is `True`, the current learning rate is divided by 5.
+          `early_stopping` is `True`, the current learning rate is divided by 5.
 
             .. versionadded:: 0.20
                 Added 'adaptive' option

From dfd447e31352cdd1d49879bdfa54b3fe449397b5 Mon Sep 17 00:00:00 2001
From: reshamas <reshama.stat@gmail.com>
Date: Thu, 13 Jan 2022 14:34:48 -0500
Subject: [PATCH 9/9] change wording 'should be'-->'must be'

---
 doc/glossary.rst                             |  2 +-
 sklearn/linear_model/_stochastic_gradient.py | 24 ++++++++++----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/doc/glossary.rst b/doc/glossary.rst
index b79d591494b01..0d67b224ece0f 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -1604,7 +1604,7 @@ functions or non-estimator constructors.
             number of different distinct random seeds. Popular integer
             random seeds are 0 and `42
             <https://en.wikipedia.org/wiki/Answer_to_the_Ultimate_Question_of_Life%2C_the_Universe%2C_and_Everything>`_.
-            Integer values should be in the range `[0, 2**32 - 1]`.
+            Integer values must be in the range `[0, 2**32 - 1]`.
 
         A :class:`numpy.random.RandomState` instance
             Use the provided random state, only affecting other users
diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index cf3a7de7756a0..8febc85b7946c 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -955,13 +955,13 @@ class SGDClassifier(BaseSGDClassifier):
         value, the stronger the regularization.
         Also used to compute the learning rate when set to `learning_rate` is
         set to 'optimal'.
-        Values should be in the range `[0.0, inf)`.
+        Values must be in the range `[0.0, inf)`.
 
     l1_ratio : float, default=0.15
         The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.
         l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.
         Only used if `penalty` is 'elasticnet'.
-        Values should be in the range `[0.0, 1.0]`.
+        Values must be in the range `[0.0, 1.0]`.
 
     fit_intercept : bool, default=True
         Whether the intercept should be estimated or not. If False, the
@@ -971,7 +971,7 @@ class SGDClassifier(BaseSGDClassifier):
         The maximum number of passes over the training data (aka epochs).
         It only impacts the behavior in the ``fit`` method, and not the
         :meth:`partial_fit` method.
-        Values should be in the range `[1, inf)`.
+        Values must be in the range `[1, inf)`.
 
         .. versionadded:: 0.19
 
@@ -981,7 +981,7 @@ class SGDClassifier(BaseSGDClassifier):
         epochs.
         Convergence is checked against the training loss or the
         validation loss depending on the `early_stopping` parameter.
-        Values should be in the range `[0.0, inf)`.
+        Values must be in the range `[0.0, inf)`.
 
         .. versionadded:: 0.19
 
@@ -990,7 +990,7 @@ class SGDClassifier(BaseSGDClassifier):
 
     verbose : int, default=0
         The verbosity level.
-        Values should be in the range `[0, inf)`.
+        Values must be in the range `[0, inf)`.
 
     epsilon : float, default=0.1
         Epsilon in the epsilon-insensitive loss functions; only if `loss` is
@@ -999,7 +999,7 @@ class SGDClassifier(BaseSGDClassifier):
         important to get the prediction exactly right.
         For epsilon-insensitive, any differences between the current prediction
         and the correct label are ignored if they are less than this threshold.
-        Values should be in the range `[0.0, inf)`.
+        Values must be in the range `[0.0, inf)`.
 
     n_jobs : int, default=None
         The number of CPUs to use to do the OVA (One Versus All, for
@@ -1012,7 +1012,7 @@ class SGDClassifier(BaseSGDClassifier):
         Used for shuffling the data, when ``shuffle`` is set to ``True``.
         Pass an int for reproducible output across multiple function calls.
         See :term:`Glossary <random_state>`.
-        Integer values should be in the range `[0, 2**32 - 1]`.
+        Integer values must be in the range `[0, 2**32 - 1]`.
 
     learning_rate : str, default='optimal'
         The learning rate schedule:
@@ -1033,11 +1033,11 @@ class SGDClassifier(BaseSGDClassifier):
         The initial learning rate for the 'constant', 'invscaling' or
         'adaptive' schedules. The default value is 0.0 as eta0 is not used by
         the default schedule 'optimal'.
-        Values should be in the range `(0.0, inf)`.
+        Values must be in the range `(0.0, inf)`.
 
     power_t : float, default=0.5
         The exponent for inverse scaling learning rate [default 0.5].
-        Values should be in the range `(-inf, inf)`.
+        Values must be in the range `(-inf, inf)`.
 
     early_stopping : bool, default=False
         Whether to use early stopping to terminate training when validation
@@ -1053,7 +1053,7 @@ class SGDClassifier(BaseSGDClassifier):
         The proportion of training data to set aside as validation set for
         early stopping. Must be between 0 and 1.
         Only used if `early_stopping` is True.
-        Values should be in the range `(0.0, 1.0)`.
+        Values must be in the range `(0.0, 1.0)`.
 
         .. versionadded:: 0.20
             Added 'validation_fraction' option
@@ -1063,7 +1063,7 @@ class SGDClassifier(BaseSGDClassifier):
         fitting.
         Convergence is checked against the training loss or the
         validation loss depending on the `early_stopping` parameter.
-        Integer values should be in the range `[1, max_iter)`.
+        Integer values must be in the range `[1, max_iter)`.
 
         .. versionadded:: 0.20
             Added 'n_iter_no_change' option
@@ -1097,7 +1097,7 @@ class SGDClassifier(BaseSGDClassifier):
         an int greater than 1, averaging will begin once the total number of
         samples seen reaches `average`. So ``average=10`` will begin
         averaging after seeing 10 samples.
-        Integer values should be in the range `[1, n_samples]`.
+        Integer values must be in the range `[1, n_samples]`.
 
     Attributes
     ----------