diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 1e5ea29740c00..1e8327330e107 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -23,9 +23,11 @@ from ..utils import indexable, check_random_state, safe_indexing from ..utils.validation import _is_arraylike, _num_samples from ..utils.metaestimators import _safe_split +from ..utils.metaestimators import if_delegate_has_method from ..externals.joblib import Parallel, delayed, logger from ..externals.six.moves import zip from ..metrics.scorer import check_scoring, _check_multimetric_scoring +from ..metrics.scorer import _passthrough_scorer from ..exceptions import FitFailedWarning from ._split import check_cv from ..preprocessing import LabelEncoder @@ -521,10 +523,50 @@ def _score(estimator, X_test, y_test, scorer, is_multimetric=False): return score +class _MemoizedPredictEstimator: + def __init__(self, estimator): + self.estimator = estimator + + def fit(self, X, y): + self.estimator.fit(X, y) + + @if_delegate_has_method(delegate='estimator') + def predict(self, X): + if not hasattr(self, '_predictions'): + self._predictions = self.estimator.predict(X) + return self._predictions + + @if_delegate_has_method(delegate='estimator') + def decision_function(self, X): + if not hasattr(self, '_decisions'): + self._decisions = self.estimator.decision_function(X) + return self._decisions + + @if_delegate_has_method(delegate='estimator') + def predict_proba(self, X): + if not hasattr(self, '_probs'): + self._probs = self.estimator.predict_proba(X) + return self._probs + + @if_delegate_has_method(delegate='estimator') + def predict_log_proba(self, X): + if not hasattr(self, '_log_probs'): + self._log_probs = self.estimator.predict_log_proba(X) + return self._log_probs + + @if_delegate_has_method(delegate='estimator') + def score(self, *args, **kwargs): + return self.estimator.score(*args, **kwargs) + + def _multimetric_score(estimator, X_test, y_test, scorers): """Return a dict of score for multimetric scoring""" scores = {} + # Try wrapping the estimator in _MemoizedPredictEstimator + # If the estimator has a score, wrapping it will not do any harm + estimator = _MemoizedPredictEstimator(estimator) + for name, scorer in scorers.items(): if y_test is None: score = scorer(estimator, X_test) diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index dedb77026c544..a51be9c3cb6dd 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -38,6 +38,7 @@ from sklearn.model_selection import learning_curve from sklearn.model_selection import validation_curve from sklearn.model_selection._validation import _check_is_permutation +from sklearn.model_selection._validation import _multimetric_score from sklearn.datasets import make_regression from sklearn.datasets import load_boston @@ -50,6 +51,7 @@ from sklearn.metrics import precision_score from sklearn.metrics import r2_score from sklearn.metrics.scorer import check_scoring +from sklearn.metrics.scorer import _check_multimetric_scoring from sklearn.linear_model import Ridge, LogisticRegression from sklearn.linear_model import PassiveAggressiveClassifier @@ -219,6 +221,17 @@ def get_params(self, deep=False): return {'a': self.a, 'allow_nd': self.allow_nd} +class CountCallPredictedEstimator: + def __init__(self): + self._n_predict_calls = 0 + self._rng = np.random.RandomState(0) + def fit(self, X, y): + return self + def predict(self, X): + self._n_predict_calls += 1 + return self._rng.randint(0, 2, size=X.shape[0]) + + # XXX: use 2D array, since 1D X is being detected as a single sample in # check_consistent_length X = np.ones((10, 2)) @@ -1299,3 +1312,14 @@ def test_permutation_test_score_pandas(): check_series = lambda x: isinstance(x, TargetType) clf = CheckingClassifier(check_X=check_df, check_y=check_series) permutation_test_score(clf, X_df, y_ser) + + +def test_multiscore_memoizing(): + # Check if memoizing works as expected in _multimetric_score + X, y = make_classification(n_samples=1000, random_state=0) + estimator = CountCallPredictedEstimator() + scorers, _ = _check_multimetric_scoring(estimator, + ['neg_mean_squared_error', + 'neg_median_absolute_error']) + scores = _multimetric_score(estimator, X, y, scorers=scorers) + assert estimator._n_predict_calls == 1