Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
864d939
added preliminary verbose output for classifier chains
efiegel Nov 29, 2020
54af621
added assertion for multilabel Y. changed verbose text slightly.
efiegel Nov 30, 2020
7a7c1b9
added test for proper multilabel Y format where Y must be of shape (n…
efiegel Nov 30, 2020
a7b09a2
updated verbose output to indicate which feature is being added to th…
efiegel Nov 30, 2020
11ce6d8
added verbose tests
efiegel Dec 1, 2020
77bd771
added documentation for new parameters for verbose
efiegel Dec 1, 2020
4df3593
fixed linting errors
efiegel Dec 1, 2020
324bc00
fixed linting errors. undid changes to gitignore
efiegel Dec 1, 2020
b95f65c
fixed linting errors. undid changes to gitignore.
efiegel Dec 1, 2020
42d86a1
Merge branch 'verbose-to-classifier-regressor-chains' of github.com:e…
efiegel Dec 2, 2020
35d4694
fixed chain tests to specify order as keyword arg. fixes future warni…
efiegel Dec 2, 2020
8af0a54
Apply suggestions from code review
efiegel Dec 19, 2020
daba9c6
removed raise value error for improper y shape with multilabel fit
efiegel Dec 19, 2020
8b008e8
removed unused import statement that was failing lint
efiegel Dec 19, 2020
f5de4d6
parameterized chain tests
efiegel Dec 19, 2020
b4d20cc
bringing in changes to master. needed to get 1.0 what's new
efiegel Dec 19, 2020
7071080
added section to whats_new
efiegel Dec 19, 2020
a98d886
linting fix
efiegel Dec 19, 2020
839d240
Merge branch 'main' into verbose-to-classifier-regressor-chains
efiegel Jan 24, 2021
26b1124
merge main
lucyleeow Jul 22, 2022
aff4b91
update whats new
lucyleeow Jul 22, 2022
72f594b
black
lucyleeow Jul 22, 2022
f729d0a
update whats new
lucyleeow Jul 22, 2022
f1209fe
review
lucyleeow Jul 25, 2022
df123ee
amend msg
lucyleeow Jul 26, 2022
4de9b7b
rm deprecate arg
lucyleeow Jul 27, 2022
75029d5
unused import
lucyleeow Jul 28, 2022
f380d25
Merge branch 'main' into verbose-to-classifier-regressor-chains
jjerphan Jul 28, 2022
94cc9b3
what's new tag + add versionadded
jeremiedbb Jul 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/whats_new/v1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,16 @@ Changelog
- |Fix| Fixed error message of :class:`metrics.coverage_error` for 1D array input.
:pr:`23548` by :user:`Hao Chun Chang <haochunchang>`.

:mod:`sklearn.multioutput`
..........................

- |Feature| Added boolean `verbose` flag to classes:
:class:`multioutput.ClassifierChain` and :class:`multioutput.RegressorChain`.
:pr:`23977` by :user:`Eric Fiegel <efiegel>`,
:user:`Chiara Marmo <cmarmo>`,
:user:`Lucy Liu <lucyleeow>`, and
:user:`Guillaume Lemaitre <glemaitre>`.

:mod:`sklearn.naive_bayes`
..........................

Expand Down
36 changes: 32 additions & 4 deletions sklearn/multioutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,14 @@
from .base import BaseEstimator, clone, MetaEstimatorMixin
from .base import RegressorMixin, ClassifierMixin, is_classifier
from .model_selection import cross_val_predict
from .utils import check_random_state, _print_elapsed_time
from .utils.metaestimators import available_if
from .utils import check_random_state
from .utils.validation import check_is_fitted, has_fit_parameter, _check_fit_params
from .utils.multiclass import check_classification_targets
from .utils.validation import (
check_is_fitted,
has_fit_parameter,
_check_fit_params,
)
from .utils.fixes import delayed
from .utils._param_validation import HasMethods

Expand Down Expand Up @@ -538,11 +542,19 @@ def _check(self):


class _BaseChain(BaseEstimator, metaclass=ABCMeta):
def __init__(self, base_estimator, *, order=None, cv=None, random_state=None):
def __init__(
self, base_estimator, *, order=None, cv=None, random_state=None, verbose=False
):
self.base_estimator = base_estimator
self.order = order
self.cv = cv
self.random_state = random_state
self.verbose = verbose

def _log_message(self, *, estimator_idx, n_estimators, processing_msg):
if not self.verbose:
return None
return f"({estimator_idx} of {n_estimators}) {processing_msg}"

@abstractmethod
def fit(self, X, Y, **fit_params):
Expand Down Expand Up @@ -602,8 +614,14 @@ def fit(self, X, Y, **fit_params):
del Y_pred_chain

for chain_idx, estimator in enumerate(self.estimators_):
message = self._log_message(
estimator_idx=chain_idx + 1,
n_estimators=len(self.estimators_),
processing_msg=f"Processing order {self.order_[chain_idx]}",
)
y = Y[:, self.order_[chain_idx]]
estimator.fit(X_aug[:, : (X.shape[1] + chain_idx)], y, **fit_params)
with _print_elapsed_time("Chain", message):
estimator.fit(X_aug[:, : (X.shape[1] + chain_idx)], y, **fit_params)
if self.cv is not None and chain_idx < len(self.estimators_) - 1:
col_idx = X.shape[1] + chain_idx
cv_result = cross_val_predict(
Expand Down Expand Up @@ -702,6 +720,11 @@ class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain):
Pass an int for reproducible output across multiple function calls.
See :term:`Glossary <random_state>`.

verbose : bool, default=False
If True, chain progress is output as each model is completed.

.. versionadded:: 1.2

Attributes
----------
classes_ : list
Expand Down Expand Up @@ -903,6 +926,11 @@ class RegressorChain(MetaEstimatorMixin, RegressorMixin, _BaseChain):
Pass an int for reproducible output across multiple function calls.
See :term:`Glossary <random_state>`.

verbose : bool, default=False
If True, chain progress is output as each model is completed.

.. versionadded:: 1.2

Attributes
----------
estimators_ : list
Expand Down
48 changes: 47 additions & 1 deletion sklearn/tests/test_multioutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
import scipy.sparse as sp
from joblib import cpu_count
import re

from sklearn.utils._testing import assert_almost_equal
from sklearn.utils._testing import assert_array_equal
Expand All @@ -10,6 +11,8 @@
from sklearn.base import clone
from sklearn.datasets import make_classification
from sklearn.datasets import load_linnerud
from sklearn.datasets import make_multilabel_classification
from sklearn.datasets import make_regression
from sklearn.ensemble import GradientBoostingRegressor, RandomForestClassifier
from sklearn.exceptions import NotFittedError
from sklearn.linear_model import Lasso
Expand All @@ -18,15 +21,17 @@
from sklearn.linear_model import Ridge
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import jaccard_score, mean_squared_error
from sklearn.multiclass import OneVsRestClassifier
from sklearn.multioutput import ClassifierChain, RegressorChain
from sklearn.multioutput import MultiOutputClassifier
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.base import ClassifierMixin
from sklearn.utils import shuffle
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.dummy import DummyRegressor, DummyClassifier
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
Expand Down Expand Up @@ -702,6 +707,47 @@ def test_classifier_chain_tuple_invalid_order():
chain.fit(X, y)


def test_classifier_chain_verbose(capsys):
X, y = make_multilabel_classification(
n_samples=100, n_features=5, n_classes=3, n_labels=3, random_state=0
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

pattern = (
r"\[Chain\].*\(1 of 3\) Processing order 0, total=.*\n"
r"\[Chain\].*\(2 of 3\) Processing order 1, total=.*\n"
r"\[Chain\].*\(3 of 3\) Processing order 2, total=.*\n$"
)

classifier = ClassifierChain(
DecisionTreeClassifier(),
order=[0, 1, 2],
random_state=0,
verbose=True,
)
classifier.fit(X_train, y_train)
assert re.match(pattern, capsys.readouterr()[0])


def test_regressor_chain_verbose(capsys):
X, y = make_regression(n_samples=125, n_targets=3, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

pattern = (
r"\[Chain\].*\(1 of 3\) Processing order 1, total=.*\n"
r"\[Chain\].*\(2 of 3\) Processing order 0, total=.*\n"
r"\[Chain\].*\(3 of 3\) Processing order 2, total=.*\n$"
)
regressor = RegressorChain(
LinearRegression(),
order=[1, 0, 2],
random_state=0,
verbose=True,
)
regressor.fit(X_train, y_train)
assert re.match(pattern, capsys.readouterr()[0])


def test_multioutputregressor_ducktypes_fitted_estimator():
"""Test that MultiOutputRegressor checks the fitted estimator for
predict. Non-regression test for #16549."""
Expand Down