Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/whats_new/v0.22.rst
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,13 @@ Changelog
:user:`Matt Hancock <notmatthancock>` and
:pr:`5963` by :user:`Pablo Duboue <DrDub>`.

- |Fix| Stacking and Voting estimators now ensure that their underlying
estimators are either all classifiers or all regressors.
:class:`ensemble.StackingClassifier`, :class:`ensemble.StackingRegressor`,
and :class:`ensemble.VotingClassifier` and :class:`VotingRegressor`
now raise consistent error messages.
:pr:`15084` by :user:`Guillaume Lemaitre <glemaitre>`.

:mod:`sklearn.feature_extraction`
.................................

Expand Down
86 changes: 3 additions & 83 deletions sklearn/ensemble/_stacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from ..base import MetaEstimatorMixin

from .base import _parallel_fit_estimator
from .base import _BaseHeterogeneousEnsemble

from ..linear_model import LogisticRegression
from ..linear_model import RidgeCV
Expand All @@ -32,80 +33,26 @@
from ..utils.validation import column_or_1d


class _BaseStacking(TransformerMixin, MetaEstimatorMixin, _BaseComposition,
class _BaseStacking(TransformerMixin, _BaseHeterogeneousEnsemble,
metaclass=ABCMeta):
"""Base class for stacking method."""
_required_parameters = ['estimators']

@abstractmethod
def __init__(self, estimators, final_estimator=None, cv=None,
stack_method='auto', n_jobs=None, verbose=0):
self.estimators = estimators
super().__init__(estimators=estimators)
self.final_estimator = final_estimator
self.cv = cv
self.stack_method = stack_method
self.n_jobs = n_jobs
self.verbose = verbose

@abstractmethod
def _validate_estimators(self):
if self.estimators is None or len(self.estimators) == 0:
raise ValueError(
"Invalid 'estimators' attribute, 'estimators' should be a list"
" of (string, estimator) tuples."
)
names, estimators = zip(*self.estimators)
self._validate_names(names)
return names, estimators

def _clone_final_estimator(self, default):
if self.final_estimator is not None:
self.final_estimator_ = clone(self.final_estimator)
else:
self.final_estimator_ = clone(default)

def set_params(self, **params):
"""Set the parameters for the stacking estimator.

Valid parameter keys can be listed with `get_params()`.

Parameters
----------
params : keyword arguments
Specific parameters using e.g.
`set_params(parameter_name=new_value)`. In addition, to setting the
parameters of the stacking estimator, the individual estimator of
the stacking estimators can also be set, or can be removed by
setting them to 'drop'.

Examples
--------
In this example, the RandomForestClassifier is removed.

>>> from sklearn.linear_model import LogisticRegression
>>> from sklearn.ensemble import RandomForestClassifier
>>> from sklearn.ensemble import VotingClassifier
>>> clf1 = LogisticRegression()
>>> clf2 = RandomForestClassifier()
>>> eclf = StackingClassifier(estimators=[('lr', clf1), ('rf', clf2)])
>>> eclf.set_params(rf='drop')
StackingClassifier(estimators=[('lr', LogisticRegression()),
('rf', 'drop')])
"""
super()._set_params('estimators', **params)
return self

def get_params(self, deep=True):
"""Get the parameters of the stacking estimator.

Parameters
----------
deep : bool
Setting it to True gets the various classifiers and the parameters
of the classifiers as well.
"""
return super()._get_params('estimators', deep=deep)

def _concatenate_predictions(self, predictions):
"""Concatenate the predictions of each first layer learner.

Expand Down Expand Up @@ -172,13 +119,6 @@ def fit(self, X, y, sample_weight=None):
names, all_estimators = self._validate_estimators()
self._validate_final_estimator()

has_estimator = any(est != 'drop' for est in all_estimators)
if not has_estimator:
raise ValueError(
"All estimators are dropped. At least one is required "
"to be an estimator."
)

stack_method = [self.stack_method] * len(all_estimators)

# Fit the base estimators on the whole training data. Those
Expand Down Expand Up @@ -416,16 +356,6 @@ def __init__(self, estimators, final_estimator=None, cv=None,
verbose=verbose
)

def _validate_estimators(self):
names, estimators = super()._validate_estimators()
for est in estimators:
if est != 'drop' and not is_classifier(est):
raise ValueError(
"The estimator {} should be a classifier."
.format(est.__class__.__name__)
)
return names, estimators

def _validate_final_estimator(self):
self._clone_final_estimator(default=LogisticRegression())
if not is_classifier(self.final_estimator_):
Expand Down Expand Up @@ -651,16 +581,6 @@ def __init__(self, estimators, final_estimator=None, cv=None, n_jobs=None,
verbose=verbose
)

def _validate_estimators(self):
names, estimators = super()._validate_estimators()
for est in estimators:
if est != 'drop' and not is_regressor(est):
raise ValueError(
"The estimator {} should be a regressor."
.format(est.__class__.__name__)
)
return names, estimators

def _validate_final_estimator(self):
self._clone_final_estimator(default=RidgeCV())
if not is_regressor(self.final_estimator_):
Expand Down
97 changes: 95 additions & 2 deletions sklearn/ensemble/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,20 @@
# Authors: Gilles Louppe
# License: BSD 3 clause

import numpy as np
from abc import ABCMeta, abstractmethod
import numbers

import numpy as np

from joblib import effective_n_jobs

from ..base import clone
from ..base import is_classifier, is_regressor
from ..base import BaseEstimator
from ..base import MetaEstimatorMixin
from ..utils import Bunch
from ..utils import check_random_state
from abc import ABCMeta, abstractmethod
from ..utils.metaestimators import _BaseComposition

MAX_RAND_SEED = np.iinfo(np.int32).max

Expand Down Expand Up @@ -178,3 +182,92 @@ def _partition_estimators(n_estimators, n_jobs):
starts = np.cumsum(n_estimators_per_job)

return n_jobs, n_estimators_per_job.tolist(), [0] + starts.tolist()


class _BaseHeterogeneousEnsemble(MetaEstimatorMixin, _BaseComposition,
metaclass=ABCMeta):
"""Base class for heterogeneous ensemble of learners.

Parameters
----------
estimators : list of (str, estimator) tuples
The ensemble of estimators to use in the ensemble. Each element of the
list is defined as a tuple of string (i.e. name of the estimator) and
an estimator instance. An estimator can be set to `'drop'` using
`set_params`.

Attributes
----------
estimators_ : list of estimators
The elements of the estimators parameter, having been fitted on the
training data. If an estimator has been set to `'drop'`, it will not
appear in `estimators_`.
"""
_required_parameters = ['estimators']

@property
def named_estimators(self):
return Bunch(**dict(self.estimators))

@abstractmethod
def __init__(self, estimators):
self.estimators = estimators

def _validate_estimators(self):
if self.estimators is None or len(self.estimators) == 0:
raise ValueError(
"Invalid 'estimators' attribute, 'estimators' should be a list"
" of (string, estimator) tuples."
)
names, estimators = zip(*self.estimators)
# defined by MetaEstimatorMixin
self._validate_names(names)

has_estimator = any(est not in (None, 'drop') for est in estimators)
if not has_estimator:
raise ValueError(
"All estimators are dropped. At least one is required "
"to be an estimator."
)

is_estimator_type = (is_classifier if is_classifier(self)
else is_regressor)

for est in estimators:
if est not in (None, 'drop') and not is_estimator_type(est):
raise ValueError(
"The estimator {} should be a {}."
.format(
est.__class__.__name__, is_estimator_type.__name__[3:]
)
)

return names, estimators

def set_params(self, **params):
"""Set the parameters of an estimator from the ensemble.

Valid parameter keys can be listed with `get_params()`.

Parameters
----------
**params : keyword arguments
Specific parameters using e.g.
`set_params(parameter_name=new_value)`. In addition, to setting the
parameters of the stacking estimator, the individual estimator of
the stacking estimators can also be set, or can be removed by
setting them to 'drop'.
"""
super()._set_params('estimators', **params)
return self

def get_params(self, deep=True):
"""Get the parameters of an estimator from the ensemble.

Parameters
----------
deep : bool
Setting it to True gets the various classifiers and the parameters
of the classifiers as well.
"""
return super()._get_params('estimators', deep=deep)
8 changes: 4 additions & 4 deletions sklearn/ensemble/tests/test_voting.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@

def test_estimator_init():
eclf = VotingClassifier(estimators=[])
msg = ('Invalid `estimators` attribute, `estimators` should be'
' a list of (string, estimator) tuples')
assert_raise_message(AttributeError, msg, eclf.fit, X, y)
msg = ("Invalid 'estimators' attribute, 'estimators' should be"
" a list of (string, estimator) tuples.")
assert_raise_message(ValueError, msg, eclf.fit, X, y)

clf = LogisticRegression(random_state=1)

Expand Down Expand Up @@ -417,7 +417,7 @@ def test_set_estimator_none(drop):
eclf2.set_params(voting='soft').fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
msg = 'All estimators are None or "drop". At least one is required!'
msg = 'All estimators are dropped. At least one is required'
assert_raise_message(
ValueError, msg, eclf2.set_params(lr=drop, rf=drop, nb=drop).fit, X, y)

Expand Down
Loading