From 1caf2ad1ebe9373dbbc5297d41dce324c78842fc Mon Sep 17 00:00:00 2001 From: Scott Gigante <84813314+scottgigante-immunai@users.noreply.github.com> Date: Fri, 18 Nov 2022 11:05:20 -0500 Subject: [PATCH 1/5] Improve docs of BaseLabelPropagation.transduction_ --- sklearn/semi_supervised/_label_propagation.py | 36 ++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index 45324715224d0..43c62e9b26fab 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -105,6 +105,34 @@ class BaseLabelPropagation(ClassifierMixin, BaseEstimator, metaclass=ABCMeta): ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. + + Attributes + ---------- + X_ : ndarray of shape (n_samples, n_features) + Input array. + + classes_ : ndarray of shape (n_classes,) + The distinct labels used in classifying instances. + + label_distributions_ : ndarray of shape (n_samples, n_classes) + Categorical distribution for each item. + + transduction_ : ndarray of shape (n_samples,) + Label assigned to each item during :term:`fit`. + + n_features_in_ : int + Number of features seen during :term:`fit`. + + .. versionadded:: 0.24 + + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + + .. versionadded:: 1.0 + + n_iter_ : int + Number of iterations run. """ _parameter_constraints: dict = { @@ -244,7 +272,7 @@ def fit(self, X, y): y : array-like of shape (n_samples,) Target class values with unlabeled points marked as -1. All unlabeled samples will be transductively assigned labels - internally. + internally, which are stored in :term:`transduction_`. Returns ------- @@ -371,7 +399,7 @@ class LabelPropagation(BaseLabelPropagation): Categorical distribution for each item. transduction_ : ndarray of shape (n_samples) - Label assigned to each item via the transduction. + Label assigned to each item during :term:`fit`. n_features_in_ : int Number of features seen during :term:`fit`. @@ -466,7 +494,7 @@ def fit(self, X, y): y : array-like of shape (n_samples,) Target class values with unlabeled points marked as -1. All unlabeled samples will be transductively assigned labels - internally. + internally, which are stored in :term:`transduction_`. Returns ------- @@ -531,7 +559,7 @@ class LabelSpreading(BaseLabelPropagation): Categorical distribution for each item. transduction_ : ndarray of shape (n_samples,) - Label assigned to each item via the transduction. + Label assigned to each item during :term:`fit`. n_features_in_ : int Number of features seen during :term:`fit`. From 56bab930dbefabb4698a4873f3594a33acb95d67 Mon Sep 17 00:00:00 2001 From: Scott Gigante <84813314+scottgigante-immunai@users.noreply.github.com> Date: Fri, 18 Nov 2022 11:09:37 -0500 Subject: [PATCH 2/5] Document lack of fit_predict --- sklearn/semi_supervised/_label_propagation.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index 43c62e9b26fab..f58dd81123824 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -212,6 +212,10 @@ def predict(self, X): y : ndarray of shape (n_samples,) Predictions for input data. """ + # Note: since `predict` does not accept semi-supervised labels as input, + # `fit(X, y).predict(X) != fit(X, y).transduction_`. + # Hence, `fit_predict` is not implemented. + # See https://github.com/scikit-learn/scikit-learn/pull/24898 probas = self.predict_proba(X) return self.classes_[np.argmax(probas, axis=1)].ravel() From 087f7ea7b245271c169784b45b13058d4717e931 Mon Sep 17 00:00:00 2001 From: Scott Gigante <84813314+scottgigante-immunai@users.noreply.github.com> Date: Fri, 18 Nov 2022 11:24:21 -0500 Subject: [PATCH 3/5] `transduction_` is not a `:term:` --- sklearn/semi_supervised/_label_propagation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index f58dd81123824..71f3e07f95217 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -276,7 +276,7 @@ def fit(self, X, y): y : array-like of shape (n_samples,) Target class values with unlabeled points marked as -1. All unlabeled samples will be transductively assigned labels - internally, which are stored in :term:`transduction_`. + internally, which are stored in `transduction_`. Returns ------- @@ -498,7 +498,7 @@ def fit(self, X, y): y : array-like of shape (n_samples,) Target class values with unlabeled points marked as -1. All unlabeled samples will be transductively assigned labels - internally, which are stored in :term:`transduction_`. + internally, which are stored in `transduction_`. Returns ------- From 85343cbfea4a4ff2732d8f5d898782dd14c3c1af Mon Sep 17 00:00:00 2001 From: Scott Gigante <84813314+scottgigante-immunai@users.noreply.github.com> Date: Mon, 21 Nov 2022 14:28:04 -0500 Subject: [PATCH 4/5] Remove attributes from private class --- sklearn/semi_supervised/_label_propagation.py | 28 ------------------- 1 file changed, 28 deletions(-) diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index 71f3e07f95217..6817e7e411c9b 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -105,34 +105,6 @@ class BaseLabelPropagation(ClassifierMixin, BaseEstimator, metaclass=ABCMeta): ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. - - Attributes - ---------- - X_ : ndarray of shape (n_samples, n_features) - Input array. - - classes_ : ndarray of shape (n_classes,) - The distinct labels used in classifying instances. - - label_distributions_ : ndarray of shape (n_samples, n_classes) - Categorical distribution for each item. - - transduction_ : ndarray of shape (n_samples,) - Label assigned to each item during :term:`fit`. - - n_features_in_ : int - Number of features seen during :term:`fit`. - - .. versionadded:: 0.24 - - feature_names_in_ : ndarray of shape (`n_features_in_`,) - Names of features seen during :term:`fit`. Defined only when `X` - has feature names that are all strings. - - .. versionadded:: 1.0 - - n_iter_ : int - Number of iterations run. """ _parameter_constraints: dict = { From 838060ac0ef21d4dfd4822a6f4ad68ce5a6c7931 Mon Sep 17 00:00:00 2001 From: Scott Gigante <84813314+scottgigante-immunai@users.noreply.github.com> Date: Mon, 21 Nov 2022 15:07:58 -0500 Subject: [PATCH 5/5] Trailing whitespace --- sklearn/semi_supervised/_label_propagation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index 6817e7e411c9b..d7463268c1c97 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -186,7 +186,7 @@ def predict(self, X): """ # Note: since `predict` does not accept semi-supervised labels as input, # `fit(X, y).predict(X) != fit(X, y).transduction_`. - # Hence, `fit_predict` is not implemented. + # Hence, `fit_predict` is not implemented. # See https://github.com/scikit-learn/scikit-learn/pull/24898 probas = self.predict_proba(X) return self.classes_[np.argmax(probas, axis=1)].ravel()