From b45a370b8fb203f2b389f49ba8b693850ca3e4bd Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 1 Nov 2023 13:20:09 +1100 Subject: [PATCH 01/15] add chain method --- doc/whats_new/v1.4.rst | 6 ++ sklearn/multioutput.py | 130 ++++++++++++++++++------------ sklearn/tests/test_multioutput.py | 74 +++++++++++++---- 3 files changed, 143 insertions(+), 67 deletions(-) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index ed7469e61567c..ae0804f04db85 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -395,6 +395,12 @@ Changelog object in the parameter grid if it's an estimator. :pr:`26786` by `Adrin Jalali`_. +:mod:`sklearn.multioutput` +.......................... + +- |Enhancement| `chain_method` parameter added to + `:class:``multioutput.ClassifierChain`. by :user:`Lucy Liu ` + :mod:`sklearn.neighbors` ........................ diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 5cf1eae96fd3b..ffe47cd9719b0 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -648,6 +648,42 @@ def _log_message(self, *, estimator_idx, n_estimators, processing_msg): return None return f"({estimator_idx} of {n_estimators}) {processing_msg}" + def _get_Y_output(self, X, *, output_method="predict"): + """Get predictions for each model in the chain.""" + check_is_fitted(self) + X = self._validate_data(X, accept_sparse=True, reset=False) + Y_output_chain = np.zeros((X.shape[0], len(self.estimators_))) + Y_feature_chain = np.zeros((X.shape[0], len(self.estimators_))) + + # Note `decision_function` output is 1d for binary targets + is_feature_multi = self.chain_method_ in ["predict_proba", "predict_log_proba"] + is_output_multi = output_method == "predict_proba" + + for chain_idx, estimator in enumerate(self.estimators_): + previous_predictions = Y_feature_chain[:, :chain_idx] + if sp.issparse(X): + X_aug = sp.hstack((X, previous_predictions)) + else: + X_aug = np.hstack((X, previous_predictions)) + + feature_func = getattr(estimator, self.chain_method_) + feature_predictions = feature_func(X_aug) + if is_feature_multi: + feature_predictions = feature_predictions[:, 1] + Y_feature_chain[:, chain_idx] = feature_predictions + + output_func = getattr(estimator, output_method) + output_predictions = output_func(X_aug) + if is_output_multi: + output_predictions = output_predictions[:, 1] + Y_output_chain[:, chain_idx] = output_predictions + + inv_order = np.empty_like(self.order_) + inv_order[self.order_] = np.arange(len(self.order_)) + Y_output = Y_output_chain[:, inv_order] + + return Y_output + @abstractmethod def fit(self, X, Y, **fit_params): """Fit the model to data matrix X and targets Y. @@ -710,6 +746,15 @@ def fit(self, X, Y, **fit_params): else: routed_params = Bunch(estimator=Bunch(fit=fit_params)) + try: + self.chain_method_ = self.chain_method + except AttributeError: + self.chain_method_ = "predict" + + # Allow for different chain methhods for `ClassifierChain`; + # proba methods produce 2d output (decision_function 1d for binary targets) + multi_output = self.chain_method_ in ["predict_proba", "predict_log_proba"] + for chain_idx, estimator in enumerate(self.estimators_): message = self._log_message( estimator_idx=chain_idx + 1, @@ -727,8 +772,10 @@ def fit(self, X, Y, **fit_params): if self.cv is not None and chain_idx < len(self.estimators_) - 1: col_idx = X.shape[1] + chain_idx cv_result = cross_val_predict( - self.base_estimator, X_aug[:, :col_idx], y=y, cv=self.cv + self.base_estimator, X_aug[:, :col_idx], y=y, cv=self.cv, method=self.chain_method_, ) + if multi_output: + cv_result = cv_result[:, -1] if sp.issparse(X_aug): X_aug[:, col_idx] = np.expand_dims(cv_result, 1) else: @@ -749,25 +796,7 @@ def predict(self, X): Y_pred : array-like of shape (n_samples, n_classes) The predicted values. """ - check_is_fitted(self) - X = self._validate_data(X, accept_sparse=True, reset=False) - Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_))) - for chain_idx, estimator in enumerate(self.estimators_): - previous_predictions = Y_pred_chain[:, :chain_idx] - if sp.issparse(X): - if chain_idx == 0: - X_aug = X - else: - X_aug = sp.hstack((X, previous_predictions)) - else: - X_aug = np.hstack((X, previous_predictions)) - Y_pred_chain[:, chain_idx] = estimator.predict(X_aug) - - inv_order = np.empty_like(self.order_) - inv_order[self.order_] = np.arange(len(self.order_)) - Y_pred = Y_pred_chain[:, inv_order] - - return Y_pred + return self._get_Y_output(X) class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain): @@ -818,6 +847,14 @@ class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain): - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. + chain_method : {'predict', 'predict_proba', 'predict_log_proba', \ + 'decision_function'}, default='predict' + + Prediction method to be used by estimators in the chain for + the 'prediction' features of previous estimators in the chain. + + .. versionadded:: 1.4 + random_state : int, RandomState instance or None, optional (default=None) If ``order='random'``, determines random number generation for the chain order. @@ -844,6 +881,9 @@ class labels for each estimator in the chain. order_ : list The order of labels in the classifier chain. + chain_method_ : str + Prediction method to be used by estimators in the chain. + n_features_in_ : int Number of features seen during :term:`fit`. Only defined if the underlying `base_estimator` exposes such an attribute when fit. @@ -891,6 +931,21 @@ class labels for each estimator in the chain. [0.0321..., 0.9935..., 0.0626...]]) """ + _parameter_constraints: dict = { + **_BaseChain._parameter_constraints, + "chain_method": [StrOptions({"predict", "predict_proba", "predict_log_proba", "decision_function"})], + } + + def __init__(self, base_estimator, *, order=None, cv=None, chain_method='predict', random_state=None, verbose=False): + super().__init__( + base_estimator, + order=order, + cv=cv, + random_state=random_state, + verbose=verbose, + ) + self.chain_method = chain_method + @_fit_context( # ClassifierChain.base_estimator is not validated yet prefer_skip_nested_validation=False @@ -941,22 +996,7 @@ def predict_proba(self, X): Y_prob : array-like of shape (n_samples, n_classes) The predicted probabilities. """ - X = self._validate_data(X, accept_sparse=True, reset=False) - Y_prob_chain = np.zeros((X.shape[0], len(self.estimators_))) - Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_))) - for chain_idx, estimator in enumerate(self.estimators_): - previous_predictions = Y_pred_chain[:, :chain_idx] - if sp.issparse(X): - X_aug = sp.hstack((X, previous_predictions)) - else: - X_aug = np.hstack((X, previous_predictions)) - Y_prob_chain[:, chain_idx] = estimator.predict_proba(X_aug)[:, 1] - Y_pred_chain[:, chain_idx] = estimator.predict(X_aug) - inv_order = np.empty_like(self.order_) - inv_order[self.order_] = np.arange(len(self.order_)) - Y_prob = Y_prob_chain[:, inv_order] - - return Y_prob + return self._get_Y_output(X, output_method="predict_proba") @_available_if_base_estimator_has("decision_function") def decision_function(self, X): @@ -973,23 +1013,7 @@ def decision_function(self, X): Returns the decision function of the sample for each model in the chain. """ - X = self._validate_data(X, accept_sparse=True, reset=False) - Y_decision_chain = np.zeros((X.shape[0], len(self.estimators_))) - Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_))) - for chain_idx, estimator in enumerate(self.estimators_): - previous_predictions = Y_pred_chain[:, :chain_idx] - if sp.issparse(X): - X_aug = sp.hstack((X, previous_predictions)) - else: - X_aug = np.hstack((X, previous_predictions)) - Y_decision_chain[:, chain_idx] = estimator.decision_function(X_aug) - Y_pred_chain[:, chain_idx] = estimator.predict(X_aug) - - inv_order = np.empty_like(self.order_) - inv_order[self.order_] = np.arange(len(self.order_)) - Y_decision = Y_decision_chain[:, inv_order] - - return Y_decision + return self._get_Y_output(X, output_method="decision_function") def get_metadata_routing(self): """Get metadata routing of this object. diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 493d0fc7dc8b5..58a3a90bf7888 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -492,10 +492,11 @@ def generate_multilabel_dataset_with_correlations(): return X, Y_multi -def test_classifier_chain_fit_and_predict_with_linear_svc(): +@pytest.mark.parametrize("chain_method", ["predict", "decision_function"]) +def test_classifier_chain_fit_and_predict_with_linear_svc(chain_method): # Fit classifier chain and verify predict performance using LinearSVC X, Y = generate_multilabel_dataset_with_correlations() - classifier_chain = ClassifierChain(LinearSVC(dual="auto")) + classifier_chain = ClassifierChain(LinearSVC(dual="auto"), chain_method=chain_method) classifier_chain.fit(X, Y) Y_pred = classifier_chain.predict(X) @@ -548,23 +549,35 @@ def test_classifier_chain_vs_independent_models(): ) -def test_base_chain_fit_and_predict(): - # Fit base chain and verify predict performance +@pytest.mark.parametrize("chain_method", ["predict", "predict_proba", "predict_log_proba", "decision_function"]) +def test_classifier_chain_fit_and_predict(chain_method): + # Fit classifier chain and verify predict performance X, Y = generate_multilabel_dataset_with_correlations() - chains = [RegressorChain(Ridge()), ClassifierChain(LogisticRegression())] - for chain in chains: - chain.fit(X, Y) - Y_pred = chain.predict(X) - assert Y_pred.shape == Y.shape - assert [c.coef_.size for c in chain.estimators_] == list( - range(X.shape[1], X.shape[1] + Y.shape[1]) - ) + chain = ClassifierChain(LogisticRegression(), chain_method=chain_method) + chain.fit(X, Y) + Y_pred = chain.predict(X) + assert Y_pred.shape == Y.shape + assert [c.coef_.size for c in chain.estimators_] == list( + range(X.shape[1], X.shape[1] + Y.shape[1]) + ) - Y_prob = chains[1].predict_proba(X) + Y_prob = chain.predict_proba(X) Y_binary = Y_prob >= 0.5 assert_array_equal(Y_binary, Y_pred) - assert isinstance(chains[1], ClassifierMixin) + assert isinstance(chain, ClassifierMixin) + + +def test_regressor_chain_fit_and_predict(): + # Fit regressor chain and verify Y and estimator coefficients shape + X, Y = generate_multilabel_dataset_with_correlations() + chain = RegressorChain(Ridge()) + chain.fit(X, Y) + Y_pred = chain.predict(X) + assert Y_pred.shape == Y.shape + assert [c.coef_.size for c in chain.estimators_] == list( + range(X.shape[1], X.shape[1] + Y.shape[1]) + ) @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) @@ -620,6 +633,39 @@ def test_base_chain_crossval_fit_and_predict(): assert mean_squared_error(Y, Y_pred_cv) < 0.25 +@pytest.mark.parametrize( + "chain_type, chain_method", + [ + ("classifier", "predict"), + ("classifier", "predict_proba"), + ("classifier", "predict_log_proba"), + ("classifier", "decision_function"), + ("regressor", ""), + ] +) +def test_base_chain_crossval_fit_and_predict(chain_type, chain_method): + # Fit chain with cross_val_predict and verify predict + # performance + X, Y = generate_multilabel_dataset_with_correlations() + + if chain_type == "classifier": + chain = ClassifierChain(LogisticRegression(), chain_method=chain_method) + else: + chain = RegressorChain(Ridge()) + chain.fit(X, Y) + chain_cv = clone(chain).set_params(cv=3) + chain_cv.fit(X, Y) + Y_pred_cv = chain_cv.predict(X) + Y_pred = chain.predict(X) + + assert Y_pred_cv.shape == Y_pred.shape + assert not np.all(Y_pred == Y_pred_cv) + if isinstance(chain, ClassifierChain): + assert jaccard_score(Y, Y_pred_cv, average="samples") > 0.4 + else: + assert mean_squared_error(Y, Y_pred_cv) < 0.25 + + @pytest.mark.parametrize( "estimator", [ From e4753e93122d723c4995d526b5c4d6ab3707c360 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 1 Nov 2023 13:20:47 +1100 Subject: [PATCH 02/15] black --- sklearn/multioutput.py | 23 ++++++++++++++++++++--- sklearn/tests/test_multioutput.py | 25 +++++++++++++++---------- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index ffe47cd9719b0..3a987b86f1a17 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -772,7 +772,11 @@ def fit(self, X, Y, **fit_params): if self.cv is not None and chain_idx < len(self.estimators_) - 1: col_idx = X.shape[1] + chain_idx cv_result = cross_val_predict( - self.base_estimator, X_aug[:, :col_idx], y=y, cv=self.cv, method=self.chain_method_, + self.base_estimator, + X_aug[:, :col_idx], + y=y, + cv=self.cv, + method=self.chain_method_, ) if multi_output: cv_result = cv_result[:, -1] @@ -933,10 +937,23 @@ class labels for each estimator in the chain. _parameter_constraints: dict = { **_BaseChain._parameter_constraints, - "chain_method": [StrOptions({"predict", "predict_proba", "predict_log_proba", "decision_function"})], + "chain_method": [ + StrOptions( + {"predict", "predict_proba", "predict_log_proba", "decision_function"} + ) + ], } - def __init__(self, base_estimator, *, order=None, cv=None, chain_method='predict', random_state=None, verbose=False): + def __init__( + self, + base_estimator, + *, + order=None, + cv=None, + chain_method="predict", + random_state=None, + verbose=False, + ): super().__init__( base_estimator, order=order, diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 58a3a90bf7888..17f56a225fb18 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -496,7 +496,9 @@ def generate_multilabel_dataset_with_correlations(): def test_classifier_chain_fit_and_predict_with_linear_svc(chain_method): # Fit classifier chain and verify predict performance using LinearSVC X, Y = generate_multilabel_dataset_with_correlations() - classifier_chain = ClassifierChain(LinearSVC(dual="auto"), chain_method=chain_method) + classifier_chain = ClassifierChain( + LinearSVC(dual="auto"), chain_method=chain_method + ) classifier_chain.fit(X, Y) Y_pred = classifier_chain.predict(X) @@ -549,7 +551,10 @@ def test_classifier_chain_vs_independent_models(): ) -@pytest.mark.parametrize("chain_method", ["predict", "predict_proba", "predict_log_proba", "decision_function"]) +@pytest.mark.parametrize( + "chain_method", + ["predict", "predict_proba", "predict_log_proba", "decision_function"], +) def test_classifier_chain_fit_and_predict(chain_method): # Fit classifier chain and verify predict performance X, Y = generate_multilabel_dataset_with_correlations() @@ -634,14 +639,14 @@ def test_base_chain_crossval_fit_and_predict(): @pytest.mark.parametrize( - "chain_type, chain_method", - [ - ("classifier", "predict"), - ("classifier", "predict_proba"), - ("classifier", "predict_log_proba"), - ("classifier", "decision_function"), - ("regressor", ""), - ] + "chain_type, chain_method", + [ + ("classifier", "predict"), + ("classifier", "predict_proba"), + ("classifier", "predict_log_proba"), + ("classifier", "decision_function"), + ("regressor", ""), + ], ) def test_base_chain_crossval_fit_and_predict(chain_type, chain_method): # Fit chain with cross_val_predict and verify predict From 347dcbbdca609037fa449d33ee1739e584bcd949 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 1 Nov 2023 13:28:54 +1100 Subject: [PATCH 03/15] whats new --- doc/whats_new/v1.4.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index ae0804f04db85..1dc85aa854e60 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -399,7 +399,7 @@ Changelog .......................... - |Enhancement| `chain_method` parameter added to - `:class:``multioutput.ClassifierChain`. by :user:`Lucy Liu ` + `:class:``multioutput.ClassifierChain`. :pr:`27700` by :user:`Lucy Liu ` :mod:`sklearn.neighbors` ........................ From d5752bf08538ab5210794cd073294e48cf5c79cf Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 1 Nov 2023 13:29:54 +1100 Subject: [PATCH 04/15] rm duplicated test --- sklearn/tests/test_multioutput.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 17f56a225fb18..d2422ea868009 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -618,26 +618,6 @@ def test_base_chain_random_order(): assert_array_almost_equal(est1.coef_, est2.coef_) -def test_base_chain_crossval_fit_and_predict(): - # Fit chain with cross_val_predict and verify predict - # performance - X, Y = generate_multilabel_dataset_with_correlations() - - for chain in [ClassifierChain(LogisticRegression()), RegressorChain(Ridge())]: - chain.fit(X, Y) - chain_cv = clone(chain).set_params(cv=3) - chain_cv.fit(X, Y) - Y_pred_cv = chain_cv.predict(X) - Y_pred = chain.predict(X) - - assert Y_pred_cv.shape == Y_pred.shape - assert not np.all(Y_pred == Y_pred_cv) - if isinstance(chain, ClassifierChain): - assert jaccard_score(Y, Y_pred_cv, average="samples") > 0.4 - else: - assert mean_squared_error(Y, Y_pred_cv) < 0.25 - - @pytest.mark.parametrize( "chain_type, chain_method", [ From 4dde42c3a641111e3650d6bd36377b46010ff0ed Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 1 Nov 2023 14:59:50 +1100 Subject: [PATCH 05/15] fix, rm chain_method param from regressorchain --- sklearn/multioutput.py | 34 ++++++++++++++++++------------- sklearn/tests/test_multioutput.py | 2 +- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 3a987b86f1a17..a137846ed68db 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -648,6 +648,14 @@ def _log_message(self, *, estimator_idx, n_estimators, processing_msg): return None return f"({estimator_idx} of {n_estimators}) {processing_msg}" + def _get_chain_method(self): + # Different chain methods allowed in `ClassifierChain` only + try: + chain_method = self.chain_method_ + except AttributeError: + chain_method = "predict" + return chain_method + def _get_Y_output(self, X, *, output_method="predict"): """Get predictions for each model in the chain.""" check_is_fitted(self) @@ -655,8 +663,9 @@ def _get_Y_output(self, X, *, output_method="predict"): Y_output_chain = np.zeros((X.shape[0], len(self.estimators_))) Y_feature_chain = np.zeros((X.shape[0], len(self.estimators_))) - # Note `decision_function` output is 1d for binary targets - is_feature_multi = self.chain_method_ in ["predict_proba", "predict_log_proba"] + chain_method = self._get_chain_method() + # proba methods produce 2d output (decision_function 1d for binary targets) + is_feature_multi = chain_method in ["predict_proba", "predict_log_proba"] is_output_multi = output_method == "predict_proba" for chain_idx, estimator in enumerate(self.estimators_): @@ -666,7 +675,7 @@ def _get_Y_output(self, X, *, output_method="predict"): else: X_aug = np.hstack((X, previous_predictions)) - feature_func = getattr(estimator, self.chain_method_) + feature_func = getattr(estimator, chain_method) feature_predictions = feature_func(X_aug) if is_feature_multi: feature_predictions = feature_predictions[:, 1] @@ -746,14 +755,9 @@ def fit(self, X, Y, **fit_params): else: routed_params = Bunch(estimator=Bunch(fit=fit_params)) - try: - self.chain_method_ = self.chain_method - except AttributeError: - self.chain_method_ = "predict" - - # Allow for different chain methhods for `ClassifierChain`; + chain_method = self._get_chain_method() # proba methods produce 2d output (decision_function 1d for binary targets) - multi_output = self.chain_method_ in ["predict_proba", "predict_log_proba"] + is_multi = chain_method in ["predict_proba", "predict_log_proba"] for chain_idx, estimator in enumerate(self.estimators_): message = self._log_message( @@ -776,10 +780,10 @@ def fit(self, X, Y, **fit_params): X_aug[:, :col_idx], y=y, cv=self.cv, - method=self.chain_method_, + method=chain_method, ) - if multi_output: - cv_result = cv_result[:, -1] + if is_multi: + cv_result = cv_result[:, 1] if sp.issparse(X_aug): X_aug[:, col_idx] = np.expand_dims(cv_result, 1) else: @@ -886,7 +890,8 @@ class labels for each estimator in the chain. The order of labels in the classifier chain. chain_method_ : str - Prediction method to be used by estimators in the chain. + Prediction method to be used by estimators in the chain for the 'prediction' + features. n_features_in_ : int Number of features seen during :term:`fit`. Only defined if the @@ -993,6 +998,7 @@ def fit(self, X, Y, **fit_params): """ _raise_for_params(fit_params, self, "fit") + self.chain_method_ = self.chain_method super().fit(X, Y, **fit_params) self.classes_ = [ estimator.classes_ for chain_idx, estimator in enumerate(self.estimators_) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index d2422ea868009..fc620139c53f1 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -497,7 +497,7 @@ def test_classifier_chain_fit_and_predict_with_linear_svc(chain_method): # Fit classifier chain and verify predict performance using LinearSVC X, Y = generate_multilabel_dataset_with_correlations() classifier_chain = ClassifierChain( - LinearSVC(dual="auto"), chain_method=chain_method + LinearSVC(dual="auto"), chain_method=chain_method, ) classifier_chain.fit(X, Y) From 8aab5d6579f8ff1d0714cb476abe461f9b3dafa2 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 1 Nov 2023 15:00:17 +1100 Subject: [PATCH 06/15] black --- sklearn/tests/test_multioutput.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index fc620139c53f1..491021c8a90bd 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -497,7 +497,8 @@ def test_classifier_chain_fit_and_predict_with_linear_svc(chain_method): # Fit classifier chain and verify predict performance using LinearSVC X, Y = generate_multilabel_dataset_with_correlations() classifier_chain = ClassifierChain( - LinearSVC(dual="auto"), chain_method=chain_method, + LinearSVC(dual="auto"), + chain_method=chain_method, ) classifier_chain.fit(X, Y) From 9e50488250a20b4d0d1753da94e29522d8814fd5 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 2 Nov 2023 16:02:16 +1100 Subject: [PATCH 07/15] use _get_response_values --- sklearn/multioutput.py | 58 ++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index a137846ed68db..523c0c509e031 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -33,6 +33,8 @@ from .model_selection import cross_val_predict from .utils import Bunch, _print_elapsed_time, check_random_state from .utils._param_validation import HasMethods, StrOptions +from .utils._response import _get_response_values +from .utils.validation import _check_response_method from .utils.metadata_routing import ( MetadataRouter, MethodMapping, @@ -649,14 +651,14 @@ def _log_message(self, *, estimator_idx, n_estimators, processing_msg): return f"({estimator_idx} of {n_estimators}) {processing_msg}" def _get_chain_method(self): - # Different chain methods allowed in `ClassifierChain` only try: - chain_method = self.chain_method_ + chain_method = self.chain_method + # `RegressorChain` does not have a `chain_method` parameter except AttributeError: chain_method = "predict" - return chain_method + return _check_response_method(self.base_estimator, chain_method) - def _get_Y_output(self, X, *, output_method="predict"): + def _get_predictions(self, X, *, output_method): """Get predictions for each model in the chain.""" check_is_fitted(self) X = self._validate_data(X, accept_sparse=True, reset=False) @@ -664,10 +666,6 @@ def _get_Y_output(self, X, *, output_method="predict"): Y_feature_chain = np.zeros((X.shape[0], len(self.estimators_))) chain_method = self._get_chain_method() - # proba methods produce 2d output (decision_function 1d for binary targets) - is_feature_multi = chain_method in ["predict_proba", "predict_log_proba"] - is_output_multi = output_method == "predict_proba" - for chain_idx, estimator in enumerate(self.estimators_): previous_predictions = Y_feature_chain[:, :chain_idx] if sp.issparse(X): @@ -675,16 +673,18 @@ def _get_Y_output(self, X, *, output_method="predict"): else: X_aug = np.hstack((X, previous_predictions)) - feature_func = getattr(estimator, chain_method) - feature_predictions = feature_func(X_aug) - if is_feature_multi: - feature_predictions = feature_predictions[:, 1] + feature_predictions, _ = _get_response_values( + estimator, + X_aug, + response_method=chain_method, + ) Y_feature_chain[:, chain_idx] = feature_predictions - output_func = getattr(estimator, output_method) - output_predictions = output_func(X_aug) - if is_output_multi: - output_predictions = output_predictions[:, 1] + output_predictions, _ = _get_response_values( + estimator, + X_aug, + response_method=output_method, + ) Y_output_chain[:, chain_idx] = output_predictions inv_order = np.empty_like(self.order_) @@ -756,9 +756,6 @@ def fit(self, X, Y, **fit_params): routed_params = Bunch(estimator=Bunch(fit=fit_params)) chain_method = self._get_chain_method() - # proba methods produce 2d output (decision_function 1d for binary targets) - is_multi = chain_method in ["predict_proba", "predict_log_proba"] - for chain_idx, estimator in enumerate(self.estimators_): message = self._log_message( estimator_idx=chain_idx + 1, @@ -782,7 +779,7 @@ def fit(self, X, Y, **fit_params): cv=self.cv, method=chain_method, ) - if is_multi: + if cv_result.ndim > 1: cv_result = cv_result[:, 1] if sp.issparse(X_aug): X_aug[:, col_idx] = np.expand_dims(cv_result, 1) @@ -804,7 +801,7 @@ def predict(self, X): Y_pred : array-like of shape (n_samples, n_classes) The predicted values. """ - return self._get_Y_output(X) + return self._get_predictions(X) class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain): @@ -856,11 +853,16 @@ class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain): - An iterable yielding (train, test) splits as arrays of indices. chain_method : {'predict', 'predict_proba', 'predict_log_proba', \ - 'decision_function'}, default='predict' + 'decision_function'} or list of such str's, default='predict' Prediction method to be used by estimators in the chain for the 'prediction' features of previous estimators in the chain. + - if `str`, name of the method; + - if a list of `str`, provides the method names in order of + preference. The method used corresponds to the first method in + the list that is implemented by `base_estimator`. + .. versionadded:: 1.4 random_state : int, RandomState instance or None, optional (default=None) @@ -890,7 +892,7 @@ class labels for each estimator in the chain. The order of labels in the classifier chain. chain_method_ : str - Prediction method to be used by estimators in the chain for the 'prediction' + Prediction method used by estimators in the chain for the 'prediction' features. n_features_in_ : int @@ -943,9 +945,11 @@ class labels for each estimator in the chain. _parameter_constraints: dict = { **_BaseChain._parameter_constraints, "chain_method": [ + list, + tuple, StrOptions( {"predict", "predict_proba", "predict_log_proba", "decision_function"} - ) + ), ], } @@ -998,11 +1002,11 @@ def fit(self, X, Y, **fit_params): """ _raise_for_params(fit_params, self, "fit") - self.chain_method_ = self.chain_method super().fit(X, Y, **fit_params) self.classes_ = [ estimator.classes_ for chain_idx, estimator in enumerate(self.estimators_) ] + self.chain_method_ = self._get_chain_method() return self @_available_if_base_estimator_has("predict_proba") @@ -1019,7 +1023,7 @@ def predict_proba(self, X): Y_prob : array-like of shape (n_samples, n_classes) The predicted probabilities. """ - return self._get_Y_output(X, output_method="predict_proba") + return self._get_predictions(X, output_method="predict_proba") @_available_if_base_estimator_has("decision_function") def decision_function(self, X): @@ -1036,7 +1040,7 @@ def decision_function(self, X): Returns the decision function of the sample for each model in the chain. """ - return self._get_Y_output(X, output_method="decision_function") + return self._get_predictions(X, output_method="decision_function") def get_metadata_routing(self): """Get metadata routing of this object. From 7ca530e0da576996ca6e8d3b6c5c52ba30860356 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 2 Nov 2023 16:12:25 +1100 Subject: [PATCH 08/15] fix-give output_method --- sklearn/multioutput.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 14c8e825b1ff7..76d1a0da6d27c 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -656,7 +656,8 @@ def _get_chain_method(self): # `RegressorChain` does not have a `chain_method` parameter except AttributeError: chain_method = "predict" - return _check_response_method(self.base_estimator, chain_method) + method = _check_response_method(self.base_estimator, chain_method) + return method.__name__ def _get_predictions(self, X, *, output_method): """Get predictions for each model in the chain.""" @@ -801,7 +802,7 @@ def predict(self, X): Y_pred : array-like of shape (n_samples, n_classes) The predicted values. """ - return self._get_predictions(X) + return self._get_predictions(X, output_method="predict") class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain): From 6d1eb7d6a1c4f9239ea7deee3277d564f7161df4 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Fri, 3 Nov 2023 10:15:47 +1100 Subject: [PATCH 09/15] fix fit chain_method_ attr --- sklearn/multioutput.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 76d1a0da6d27c..912c59cced7bd 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -650,15 +650,6 @@ def _log_message(self, *, estimator_idx, n_estimators, processing_msg): return None return f"({estimator_idx} of {n_estimators}) {processing_msg}" - def _get_chain_method(self): - try: - chain_method = self.chain_method - # `RegressorChain` does not have a `chain_method` parameter - except AttributeError: - chain_method = "predict" - method = _check_response_method(self.base_estimator, chain_method) - return method.__name__ - def _get_predictions(self, X, *, output_method): """Get predictions for each model in the chain.""" check_is_fitted(self) @@ -666,7 +657,9 @@ def _get_predictions(self, X, *, output_method): Y_output_chain = np.zeros((X.shape[0], len(self.estimators_))) Y_feature_chain = np.zeros((X.shape[0], len(self.estimators_))) - chain_method = self._get_chain_method() + # `RegressorChain` does not have a `chain_method_` parameter so we + # default to "predict" + chain_method = getattr(self, "chain_method_", "predict") for chain_idx, estimator in enumerate(self.estimators_): previous_predictions = Y_feature_chain[:, :chain_idx] if sp.issparse(X): @@ -756,7 +749,15 @@ def fit(self, X, Y, **fit_params): else: routed_params = Bunch(estimator=Bunch(fit=fit_params)) - chain_method = self._get_chain_method() + if hasattr(self, "chain_method"): + chain_method = _check_response_method( + self.base_estimator, self.chain_method, + ).__name__ + self.chain_method_ = chain_method + else: + # `RegressorChain` does not have a `chain_method` parameter + chain_method = "predict" + for chain_idx, estimator in enumerate(self.estimators_): message = self._log_message( estimator_idx=chain_idx + 1, @@ -1005,7 +1006,6 @@ def fit(self, X, Y, **fit_params): super().fit(X, Y, **fit_params) self.classes_ = [estimator.classes_ for estimator in self.estimators_] - self.chain_method_ = self._get_chain_method() return self @_available_if_base_estimator_has("predict_proba") From dbe7ef27e1867d93edd1b0668d91c66b28772cc4 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Fri, 3 Nov 2023 10:16:06 +1100 Subject: [PATCH 10/15] black --- sklearn/multioutput.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 912c59cced7bd..ca234ae054cb9 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -751,7 +751,8 @@ def fit(self, X, Y, **fit_params): if hasattr(self, "chain_method"): chain_method = _check_response_method( - self.base_estimator, self.chain_method, + self.base_estimator, + self.chain_method, ).__name__ self.chain_method_ = chain_method else: From 41414511514fadb95e6816546371cf4ff2edc00d Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Mon, 4 Dec 2023 21:18:31 +1100 Subject: [PATCH 11/15] lint --- sklearn/multioutput.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index ca234ae054cb9..b07e0d09ceceb 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -34,7 +34,6 @@ from .utils import Bunch, _print_elapsed_time, check_random_state from .utils._param_validation import HasMethods, StrOptions from .utils._response import _get_response_values -from .utils.validation import _check_response_method from .utils.metadata_routing import ( MetadataRouter, MethodMapping, @@ -45,7 +44,12 @@ from .utils.metaestimators import available_if from .utils.multiclass import check_classification_targets from .utils.parallel import Parallel, delayed -from .utils.validation import _check_method_params, check_is_fitted, has_fit_parameter +from .utils.validation import ( + _check_method_params, + _check_response_method, + check_is_fitted, + has_fit_parameter, +) __all__ = [ "MultiOutputRegressor", From c554ac52bf6960133e299ea5d53147374f86a560 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Tue, 5 Dec 2023 17:11:00 +1100 Subject: [PATCH 12/15] review formatting --- doc/whats_new/v1.4.rst | 4 ++-- sklearn/multioutput.py | 15 +++++---------- sklearn/tests/test_multioutput.py | 12 ++++-------- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 00b0be9917961..a46b43bd46e33 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -532,8 +532,8 @@ Changelog :mod:`sklearn.multioutput` .......................... -- |Enhancement| `chain_method` parameter added to - `:class:``multioutput.ClassifierChain`. :pr:`27700` by :user:`Lucy Liu ` +- |Enhancement| `chain_method` parameter added to `:class:``multioutput.ClassifierChain`. + :pr:`27700` by :user:`Lucy Liu `. :mod:`sklearn.neighbors` ........................ diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index b07e0d09ceceb..528751011654a 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -672,16 +672,12 @@ def _get_predictions(self, X, *, output_method): X_aug = np.hstack((X, previous_predictions)) feature_predictions, _ = _get_response_values( - estimator, - X_aug, - response_method=chain_method, + estimator, X_aug, response_method=chain_method, ) Y_feature_chain[:, chain_idx] = feature_predictions output_predictions, _ = _get_response_values( - estimator, - X_aug, - response_method=output_method, + estimator, X_aug, response_method=output_method, ) Y_output_chain[:, chain_idx] = output_predictions @@ -755,8 +751,7 @@ def fit(self, X, Y, **fit_params): if hasattr(self, "chain_method"): chain_method = _check_response_method( - self.base_estimator, - self.chain_method, + self.base_estimator, self.chain_method, ).__name__ self.chain_method_ = chain_method else: @@ -860,7 +855,7 @@ class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain): - An iterable yielding (train, test) splits as arrays of indices. chain_method : {'predict', 'predict_proba', 'predict_log_proba', \ - 'decision_function'} or list of such str's, default='predict' + 'decision_function'} or list of such str's, default='predict' Prediction method to be used by estimators in the chain for the 'prediction' features of previous estimators in the chain. @@ -899,7 +894,7 @@ class labels for each estimator in the chain. The order of labels in the classifier chain. chain_method_ : str - Prediction method used by estimators in the chain for the 'prediction' + Prediction method used by estimators in the chain for the prediction features. n_features_in_ : int diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 491021c8a90bd..d16d29d460776 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -497,10 +497,8 @@ def test_classifier_chain_fit_and_predict_with_linear_svc(chain_method): # Fit classifier chain and verify predict performance using LinearSVC X, Y = generate_multilabel_dataset_with_correlations() classifier_chain = ClassifierChain( - LinearSVC(dual="auto"), - chain_method=chain_method, - ) - classifier_chain.fit(X, Y) + LinearSVC(dual="auto"), chain_method=chain_method, + ).fit(X, Y) Y_pred = classifier_chain.predict(X) assert Y_pred.shape == Y.shape @@ -518,12 +516,10 @@ def test_classifier_chain_fit_and_predict_with_sparse_data(csr_container): X, Y = generate_multilabel_dataset_with_correlations() X_sparse = csr_container(X) - classifier_chain = ClassifierChain(LogisticRegression()) - classifier_chain.fit(X_sparse, Y) + classifier_chain = ClassifierChain(LogisticRegression()).fit(X_sparse, Y) Y_pred_sparse = classifier_chain.predict(X_sparse) - classifier_chain = ClassifierChain(LogisticRegression()) - classifier_chain.fit(X, Y) + classifier_chain = ClassifierChain(LogisticRegression()).fit(X, Y) Y_pred_dense = classifier_chain.predict(X) assert_array_equal(Y_pred_sparse, Y_pred_dense) From aa09f21f4043fe88a37bd0ebf3389c9bdcb2ea71 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Tue, 5 Dec 2023 17:18:22 +1100 Subject: [PATCH 13/15] review comment, hstack --- sklearn/multioutput.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 528751011654a..854c4343cca8b 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -664,12 +664,10 @@ def _get_predictions(self, X, *, output_method): # `RegressorChain` does not have a `chain_method_` parameter so we # default to "predict" chain_method = getattr(self, "chain_method_", "predict") + hstack = sp.hstack if sp.issparse(X) else np.hstack for chain_idx, estimator in enumerate(self.estimators_): previous_predictions = Y_feature_chain[:, :chain_idx] - if sp.issparse(X): - X_aug = sp.hstack((X, previous_predictions)) - else: - X_aug = np.hstack((X, previous_predictions)) + X_aug = hstack((X, previous_predictions)) feature_predictions, _ = _get_response_values( estimator, X_aug, response_method=chain_method, @@ -781,6 +779,7 @@ def fit(self, X, Y, **fit_params): cv=self.cv, method=chain_method, ) + # `predict_proba` output is 2D, we use only output for classes[-1] if cv_result.ndim > 1: cv_result = cv_result[:, 1] if sp.issparse(X_aug): From 443bcdbd76d6449a8d33832b409177460c8c3d13 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Tue, 5 Dec 2023 20:38:10 +1100 Subject: [PATCH 14/15] black --- sklearn/multioutput.py | 11 ++++++++--- sklearn/tests/test_multioutput.py | 3 ++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 854c4343cca8b..3ee89d40058c1 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -670,12 +670,16 @@ def _get_predictions(self, X, *, output_method): X_aug = hstack((X, previous_predictions)) feature_predictions, _ = _get_response_values( - estimator, X_aug, response_method=chain_method, + estimator, + X_aug, + response_method=chain_method, ) Y_feature_chain[:, chain_idx] = feature_predictions output_predictions, _ = _get_response_values( - estimator, X_aug, response_method=output_method, + estimator, + X_aug, + response_method=output_method, ) Y_output_chain[:, chain_idx] = output_predictions @@ -749,7 +753,8 @@ def fit(self, X, Y, **fit_params): if hasattr(self, "chain_method"): chain_method = _check_response_method( - self.base_estimator, self.chain_method, + self.base_estimator, + self.chain_method, ).__name__ self.chain_method_ = chain_method else: diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index d16d29d460776..7ad1c797af257 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -497,7 +497,8 @@ def test_classifier_chain_fit_and_predict_with_linear_svc(chain_method): # Fit classifier chain and verify predict performance using LinearSVC X, Y = generate_multilabel_dataset_with_correlations() classifier_chain = ClassifierChain( - LinearSVC(dual="auto"), chain_method=chain_method, + LinearSVC(dual="auto"), + chain_method=chain_method, ).fit(X, Y) Y_pred = classifier_chain.predict(X) From 969d8892a6e78ae91c028374f3479d0c94a9cb86 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Fri, 23 Feb 2024 11:49:36 +1100 Subject: [PATCH 15/15] update v --- doc/whats_new/v1.4.rst | 6 ------ doc/whats_new/v1.5.rst | 6 ++++++ sklearn/multioutput.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 76737a1722e4a..7092c53da1a27 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -865,12 +865,6 @@ Changelog - |Enhancement| Add method `predict_log_proba` to :class:`multioutput.ClassifierChain`. :pr:`27720` by :user:`Guillaume Lemaitre `. -:mod:`sklearn.multioutput` -.......................... - -- |Enhancement| `chain_method` parameter added to `:class:``multioutput.ClassifierChain`. - :pr:`27700` by :user:`Lucy Liu `. - :mod:`sklearn.neighbors` ........................ diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index 53f0fbd8a74e8..d70c9cc2f1f23 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -158,6 +158,12 @@ Changelog - |Enhancement| :term:`CV splitters ` that ignores the group parameter now raises a warning when groups are passed in to :term:`split`. :pr:`28210` by +:mod:`sklearn.multioutput` +.......................... + +- |Enhancement| `chain_method` parameter added to `:class:``multioutput.ClassifierChain`. + :pr:`27700` by :user:`Lucy Liu `. + :mod:`sklearn.pipeline` ....................... diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index f593497336da9..64649007d6f24 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -871,7 +871,7 @@ class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain): preference. The method used corresponds to the first method in the list that is implemented by `base_estimator`. - .. versionadded:: 1.4 + .. versionadded:: 1.5 random_state : int, RandomState instance or None, optional (default=None) If ``order='random'``, determines random number generation for the