From 35ef4ea9c6f94f909a053cc1a8185df211e96e31 Mon Sep 17 00:00:00 2001 From: qz <2995452226@qq.com> Date: Thu, 9 Jan 2020 22:21:45 +0800 Subject: [PATCH 1/7] make docstring of confusion_matrix more clear --- sklearn/metrics/_classification.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index cba7f2c2e8fc8..8f35e4733d4d2 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -232,7 +232,8 @@ def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, Returns ------- C : ndarray of shape (n_classes, n_classes) - Confusion matrix. + Confusion matrix whose i-th row and j-th column entry corresponds to the number + of samples with true labels in i-th class and the prediced labels in j-th class. References ---------- From b37d305926947cdfe3fedc7373b8e5bb4931e8b0 Mon Sep 17 00:00:00 2001 From: qz <2995452226@qq.com> Date: Thu, 9 Jan 2020 22:32:58 +0800 Subject: [PATCH 2/7] Update _classification.py --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 8f35e4733d4d2..37c373fe05cb6 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -233,7 +233,7 @@ def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, ------- C : ndarray of shape (n_classes, n_classes) Confusion matrix whose i-th row and j-th column entry corresponds to the number - of samples with true labels in i-th class and the prediced labels in j-th class. + of samples with true labels in i-th class and the prediced labels in j-th class. References ---------- From 2db42e10cba2616532c06178c9b1d275cc1633b5 Mon Sep 17 00:00:00 2001 From: Qizhi Jiang Date: Fri, 10 Jan 2020 08:58:08 +0800 Subject: [PATCH 3/7] Update sklearn/metrics/_classification.py Co-Authored-By: Joel Nothman --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 37c373fe05cb6..5fb611eacf307 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -232,7 +232,7 @@ def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, Returns ------- C : ndarray of shape (n_classes, n_classes) - Confusion matrix whose i-th row and j-th column entry corresponds to the number + Confusion matrix whose i-th row and j-th column entry indicates the number of samples with true labels in i-th class and the prediced labels in j-th class. References From 3f1a3d291da0785d1f19519abd0efb1ae07ca8c1 Mon Sep 17 00:00:00 2001 From: Qizhi Jiang Date: Fri, 10 Jan 2020 09:10:03 +0800 Subject: [PATCH 4/7] Update sklearn/metrics/_classification.py Co-Authored-By: Joel Nothman --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 5fb611eacf307..4b85808ce419c 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -233,7 +233,7 @@ def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, ------- C : ndarray of shape (n_classes, n_classes) Confusion matrix whose i-th row and j-th column entry indicates the number - of samples with true labels in i-th class and the prediced labels in j-th class. + of samples with true label being i-th class and prediced label being j-th class. References ---------- From d33a61dd6db45a6564fc92e4459b6c2328ff2a18 Mon Sep 17 00:00:00 2001 From: qz <2995452226@qq.com> Date: Sat, 11 Jan 2020 18:57:38 +0800 Subject: [PATCH 5/7] Update _classification.py --- sklearn/metrics/_classification.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 4b85808ce419c..7f644e7b207aa 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -232,8 +232,9 @@ def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, Returns ------- C : ndarray of shape (n_classes, n_classes) - Confusion matrix whose i-th row and j-th column entry indicates the number - of samples with true label being i-th class and prediced label being j-th class. + Confusion matrix whose i-th row and j-th column entry indicates + the number of samples with true label being i-th class and + prediced label being j-th class. References ---------- From e0bee3b83c625fe5306fd011319fd8e766606a7a Mon Sep 17 00:00:00 2001 From: qz <2995452226@qq.com> Date: Sat, 11 Jan 2020 19:14:54 +0800 Subject: [PATCH 6/7] Update _classification.py --- sklearn/metrics/_classification.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 7f644e7b207aa..5e56816ab726f 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -232,9 +232,10 @@ def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, Returns ------- C : ndarray of shape (n_classes, n_classes) - Confusion matrix whose i-th row and j-th column entry indicates - the number of samples with true label being i-th class and - prediced label being j-th class. + Confusion matrix whose i-th row and j-th + column entry indicates the number of + samples with true label being i-th class + and prediced label being j-th class. References ---------- From c6f275144651580bc6fbfbc24f9664f0deaf3fc2 Mon Sep 17 00:00:00 2001 From: qz <2995452226@qq.com> Date: Sat, 11 Jan 2020 20:44:34 +0800 Subject: [PATCH 7/7] DOC improve random_state docstring model selection modue (#15576) --- sklearn/model_selection/_search.py | 18 +++--- sklearn/model_selection/_split.py | 80 ++++++++++++-------------- sklearn/model_selection/_validation.py | 19 +++--- 3 files changed, 54 insertions(+), 63 deletions(-) diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 934ec0df6b116..ede292c2b6261 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -204,13 +204,12 @@ class ParameterSampler: n_iter : integer Number of parameter settings that are produced. - random_state : int, RandomState instance or None, optional (default=None) + random_state : int, RandomState instance or None, default=None Pseudo random number generator state used for random uniform sampling from lists of possible values instead of scipy.stats distributions. - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + Pass an int for reproducible output across multiple + function calls. + See :term:`Glossary `. Returns ------- @@ -1296,13 +1295,12 @@ class RandomizedSearchCV(BaseSearchCV): verbose : integer Controls the verbosity: the higher, the more messages. - random_state : int, RandomState instance or None, optional, default=None + random_state : int, RandomState instance or None, default=None Pseudo random number generator state used for random uniform sampling from lists of possible values instead of scipy.stats distributions. - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + Pass an int for reproducible output across multiple + function calls. + See :term:`Glossary `. error_score : 'raise' or numeric Value to assign to the score if an error occurs in estimator fitting. diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index e03d9aa29b3ac..6bab796fb54a7 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -380,12 +380,12 @@ class KFold(_BaseKFold): Whether to shuffle the data before splitting into batches. Note that the samples within each split will not be shuffled. - random_state : int, RandomState instance or None, optional, default=None - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. Only used when ``shuffle`` is True. This should be left + random_state : int, RandomState instance or None, default=None + Only used when ``shuffle`` is True. This should be left to None if ``shuffle`` is False. + Pass an int for reproducible output across multiple + function calls. + See :term:`Glossary `. Examples -------- @@ -587,12 +587,12 @@ class StratifiedKFold(_BaseKFold): Whether to shuffle each class's samples before splitting into batches. Note that the samples within each split will not be shuffled. - random_state : int, RandomState instance or None, optional, default=None - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. Only used when ``shuffle`` is True. This should be left + random_state : int, RandomState instance or None, default=None + Only used when ``shuffle`` is True. This should be left to None if ``shuffle`` is False. + Pass an int for reproducible output across multiple + function calls. + See :term:`Glossary `. Examples -------- @@ -1090,11 +1090,10 @@ class _RepeatedSplits(metaclass=ABCMeta): n_repeats : int, default=10 Number of times cross-validator needs to be repeated. - random_state : int, RandomState instance or None, optional, default=None - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + random_state : int, RandomState instance or None, default=None + Pass an int for reproducible output across multiple + function calls. + See :term:`Glossary `. **cvargs : additional params Constructor parameters for cv. Must not contain random_state @@ -1195,11 +1194,9 @@ class RepeatedKFold(_RepeatedSplits): n_repeats : int, default=10 Number of times cross-validator needs to be repeated. - random_state : int, RandomState instance or None, optional, default=None - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + random_state : int, RandomState instance or None, default=None + Pass an int for reproducible output across multiple + function calls.See :term:`Glossary `. Examples -------- @@ -1249,9 +1246,10 @@ class RepeatedStratifiedKFold(_RepeatedSplits): n_repeats : int, default=10 Number of times cross-validator needs to be repeated. - random_state : None, int or RandomState, default=None - Random state to be used to generate random state for each - repetition. + random_state : int, RandomState instance or None, default=None + Pass an int for reproducible output across multiple + function calls. + See :term:`Glossary `. Examples -------- @@ -1389,11 +1387,10 @@ class ShuffleSplit(BaseShuffleSplit): int, represents the absolute number of train samples. If None, the value is automatically set to the complement of the test size. - random_state : int, RandomState instance or None, optional (default=None) - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + random_state : int, RandomState instance or None, default=None + Pass an int for reproducible output across multiple + function calls. + See :term:`Glossary `. Examples -------- @@ -1491,11 +1488,10 @@ class GroupShuffleSplit(ShuffleSplit): int, represents the absolute number of train groups. If None, the value is automatically set to the complement of the test size. - random_state : int, RandomState instance or None, optional (default=None) - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + random_state : int, RandomState instance or None, default=None + Pass an int for reproducible output across multiple + function calls. + See :term:`Glossary `. Examples -------- @@ -1604,11 +1600,10 @@ class StratifiedShuffleSplit(BaseShuffleSplit): int, represents the absolute number of train samples. If None, the value is automatically set to the complement of the test size. - random_state : int, RandomState instance or None, optional (default=None) - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + random_state : int, RandomState instance or None, default=None + Pass an int for reproducible output across multiple + function calls. + See :term:`Glossary `. Examples -------- @@ -2048,11 +2043,10 @@ def train_test_split(*arrays, **options): int, represents the absolute number of train samples. If None, the value is automatically set to the complement of the test size. - random_state : int, RandomState instance or None, optional (default=None) - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + random_state : int, RandomState instance or None, default=None + Pass an int for reproducible output across multiple + function calls. + See :term:`Glossary `. shuffle : boolean, optional (default=True) Whether or not to shuffle the data before splitting. If shuffle=False diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index f841484ce8eb0..6bbe6e0c5ce95 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -1003,11 +1003,10 @@ def permutation_test_score(estimator, X, y, groups=None, cv=None, ``-1`` means using all processors. See :term:`Glossary ` for more details. - random_state : int, RandomState instance or None, optional (default=0) - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + random_state : int, RandomState instance or None, default=None + Pass an int for reproducible output across multiple + function calls. + See :term:`Glossary `. verbose : integer, optional The verbosity level. @@ -1173,11 +1172,11 @@ def learning_curve(estimator, X, y, groups=None, Whether to shuffle training data before taking prefixes of it based on``train_sizes``. - random_state : int, RandomState instance or None, optional (default=None) - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. Used when ``shuffle`` is True. + random_state : int, RandomState instance or None, default=None + Used when ``shuffle`` is True. + Pass an int for reproducible output across multiple + function calls. + See :term:`Glossary `. error_score : 'raise' or numeric Value to assign to the score if an error occurs in estimator fitting.