diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 4a9e92bf2f201..ba51e28229462 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -220,6 +220,16 @@ Changelog - |Fix| Fixed error message of :class:`metrics.coverage_error` for 1D array input. :pr:`23548` by :user:`Hao Chun Chang `. +:mod:`sklearn.multioutput` +.......................... + +- |Feature| Added boolean `verbose` flag to classes: + :class:`multioutput.ClassifierChain` and :class:`multioutput.RegressorChain`. + :pr:`23977` by :user:`Eric Fiegel `, + :user:`Chiara Marmo `, + :user:`Lucy Liu `, and + :user:`Guillaume Lemaitre `. + :mod:`sklearn.naive_bayes` .......................... diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 13f95798a6b15..bbd6295bf810f 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -23,10 +23,14 @@ from .base import BaseEstimator, clone, MetaEstimatorMixin from .base import RegressorMixin, ClassifierMixin, is_classifier from .model_selection import cross_val_predict +from .utils import check_random_state, _print_elapsed_time from .utils.metaestimators import available_if -from .utils import check_random_state -from .utils.validation import check_is_fitted, has_fit_parameter, _check_fit_params from .utils.multiclass import check_classification_targets +from .utils.validation import ( + check_is_fitted, + has_fit_parameter, + _check_fit_params, +) from .utils.fixes import delayed from .utils._param_validation import HasMethods @@ -538,11 +542,19 @@ def _check(self): class _BaseChain(BaseEstimator, metaclass=ABCMeta): - def __init__(self, base_estimator, *, order=None, cv=None, random_state=None): + def __init__( + self, base_estimator, *, order=None, cv=None, random_state=None, verbose=False + ): self.base_estimator = base_estimator self.order = order self.cv = cv self.random_state = random_state + self.verbose = verbose + + def _log_message(self, *, estimator_idx, n_estimators, processing_msg): + if not self.verbose: + return None + return f"({estimator_idx} of {n_estimators}) {processing_msg}" @abstractmethod def fit(self, X, Y, **fit_params): @@ -602,8 +614,14 @@ def fit(self, X, Y, **fit_params): del Y_pred_chain for chain_idx, estimator in enumerate(self.estimators_): + message = self._log_message( + estimator_idx=chain_idx + 1, + n_estimators=len(self.estimators_), + processing_msg=f"Processing order {self.order_[chain_idx]}", + ) y = Y[:, self.order_[chain_idx]] - estimator.fit(X_aug[:, : (X.shape[1] + chain_idx)], y, **fit_params) + with _print_elapsed_time("Chain", message): + estimator.fit(X_aug[:, : (X.shape[1] + chain_idx)], y, **fit_params) if self.cv is not None and chain_idx < len(self.estimators_) - 1: col_idx = X.shape[1] + chain_idx cv_result = cross_val_predict( @@ -702,6 +720,11 @@ class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain): Pass an int for reproducible output across multiple function calls. See :term:`Glossary `. + verbose : bool, default=False + If True, chain progress is output as each model is completed. + + .. versionadded:: 1.2 + Attributes ---------- classes_ : list @@ -903,6 +926,11 @@ class RegressorChain(MetaEstimatorMixin, RegressorMixin, _BaseChain): Pass an int for reproducible output across multiple function calls. See :term:`Glossary `. + verbose : bool, default=False + If True, chain progress is output as each model is completed. + + .. versionadded:: 1.2 + Attributes ---------- estimators_ : list diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 25d209223acc1..ad95282fa6614 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -2,6 +2,7 @@ import numpy as np import scipy.sparse as sp from joblib import cpu_count +import re from sklearn.utils._testing import assert_almost_equal from sklearn.utils._testing import assert_array_equal @@ -10,6 +11,8 @@ from sklearn.base import clone from sklearn.datasets import make_classification from sklearn.datasets import load_linnerud +from sklearn.datasets import make_multilabel_classification +from sklearn.datasets import make_regression from sklearn.ensemble import GradientBoostingRegressor, RandomForestClassifier from sklearn.exceptions import NotFittedError from sklearn.linear_model import Lasso @@ -18,15 +21,17 @@ from sklearn.linear_model import Ridge from sklearn.linear_model import SGDClassifier from sklearn.linear_model import SGDRegressor +from sklearn.linear_model import LinearRegression from sklearn.metrics import jaccard_score, mean_squared_error from sklearn.multiclass import OneVsRestClassifier from sklearn.multioutput import ClassifierChain, RegressorChain from sklearn.multioutput import MultiOutputClassifier from sklearn.multioutput import MultiOutputRegressor from sklearn.svm import LinearSVC +from sklearn.tree import DecisionTreeClassifier from sklearn.base import ClassifierMixin from sklearn.utils import shuffle -from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.dummy import DummyRegressor, DummyClassifier from sklearn.pipeline import make_pipeline from sklearn.impute import SimpleImputer @@ -702,6 +707,47 @@ def test_classifier_chain_tuple_invalid_order(): chain.fit(X, y) +def test_classifier_chain_verbose(capsys): + X, y = make_multilabel_classification( + n_samples=100, n_features=5, n_classes=3, n_labels=3, random_state=0 + ) + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + + pattern = ( + r"\[Chain\].*\(1 of 3\) Processing order 0, total=.*\n" + r"\[Chain\].*\(2 of 3\) Processing order 1, total=.*\n" + r"\[Chain\].*\(3 of 3\) Processing order 2, total=.*\n$" + ) + + classifier = ClassifierChain( + DecisionTreeClassifier(), + order=[0, 1, 2], + random_state=0, + verbose=True, + ) + classifier.fit(X_train, y_train) + assert re.match(pattern, capsys.readouterr()[0]) + + +def test_regressor_chain_verbose(capsys): + X, y = make_regression(n_samples=125, n_targets=3, random_state=0) + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + + pattern = ( + r"\[Chain\].*\(1 of 3\) Processing order 1, total=.*\n" + r"\[Chain\].*\(2 of 3\) Processing order 0, total=.*\n" + r"\[Chain\].*\(3 of 3\) Processing order 2, total=.*\n$" + ) + regressor = RegressorChain( + LinearRegression(), + order=[1, 0, 2], + random_state=0, + verbose=True, + ) + regressor.fit(X_train, y_train) + assert re.match(pattern, capsys.readouterr()[0]) + + def test_multioutputregressor_ducktypes_fitted_estimator(): """Test that MultiOutputRegressor checks the fitted estimator for predict. Non-regression test for #16549."""