From a2c94d4c546d17910943f864aef395dd175f2870 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Sat, 1 Jul 2017 22:09:54 +0800 Subject: [PATCH 1/3] add scorer based on explained_variance_score --- doc/modules/model_evaluation.rst | 3 ++- sklearn/metrics/scorer.py | 7 +++++-- sklearn/metrics/tests/test_score_objects.py | 8 ++++---- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index d010256e94345..480d74e8e46f9 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -81,6 +81,7 @@ Scoring Function 'v_measure_score' :func:`metrics.v_measure_score` **Regression** +'explained_variance_score' :func:`metrics.explained_variance_score` 'neg_mean_absolute_error' :func:`metrics.mean_absolute_error` 'neg_mean_squared_error' :func:`metrics.mean_squared_error` 'neg_mean_squared_log_error' :func:`metrics.mean_squared_log_error` @@ -101,7 +102,7 @@ Usage examples: >>> model = svm.SVC() >>> cross_val_score(model, X, y, scoring='wrong_choice') Traceback (most recent call last): - ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'completeness_score', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] + ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'completeness_score', 'explained_variance_score', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] .. note:: diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 3a163d967c542..d056608d5570a 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -26,7 +26,8 @@ from . import (r2_score, median_absolute_error, mean_absolute_error, mean_squared_error, mean_squared_log_error, accuracy_score, f1_score, roc_auc_score, average_precision_score, - precision_score, recall_score, log_loss) + precision_score, recall_score, log_loss, + explained_variance_score) from .cluster import adjusted_rand_score from .cluster import homogeneity_score @@ -349,6 +350,7 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, # Standard regression scores +explained_variance_scorer = make_scorer(explained_variance_score) r2_scorer = make_scorer(r2_score) neg_mean_squared_error_scorer = make_scorer(mean_squared_error, greater_is_better=False) @@ -411,7 +413,8 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, fowlkes_mallows_scorer = make_scorer(fowlkes_mallows_score) -SCORERS = dict(r2=r2_scorer, +SCORERS = dict(explained_variance_score=explained_variance_scorer, + r2=r2_scorer, neg_median_absolute_error=neg_median_absolute_error_scorer, neg_mean_absolute_error=neg_mean_absolute_error_scorer, neg_mean_squared_error=neg_mean_squared_error_scorer, diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 461bdadf3d6e5..447e00b2fc5c9 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -25,7 +25,6 @@ from sklearn.svm import LinearSVC from sklearn.pipeline import make_pipeline from sklearn.cluster import KMeans -from sklearn.dummy import DummyRegressor from sklearn.linear_model import Ridge, LogisticRegression from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.datasets import make_blobs @@ -38,8 +37,9 @@ from sklearn.externals import joblib -REGRESSION_SCORERS = ['r2', 'neg_mean_absolute_error', - 'neg_mean_squared_error', 'neg_mean_squared_log_error', +REGRESSION_SCORERS = ['explained_variance_score', 'r2', + 'neg_mean_absolute_error', 'neg_mean_squared_error', + 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'mean_absolute_error', 'mean_squared_error', 'median_absolute_error'] @@ -64,7 +64,7 @@ def _make_estimators(X_train, y_train, y_ml_train): # Make estimators that make sense to test various scoring methods - sensible_regr = DummyRegressor(strategy='median') + sensible_regr = DecisionTreeRegressor(random_state=0) sensible_regr.fit(X_train, y_train) sensible_clf = DecisionTreeClassifier(random_state=0) sensible_clf.fit(X_train, y_train) From eaa1c97fba80123c7b8cd236f4b47d929b4bb81f Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Sun, 2 Jul 2017 09:00:20 +0800 Subject: [PATCH 2/3] explained_variance_score -> explained_variance --- doc/modules/model_evaluation.rst | 4 ++-- sklearn/metrics/scorer.py | 2 +- sklearn/metrics/tests/test_score_objects.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 480d74e8e46f9..9acb39f8d7da3 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -81,7 +81,7 @@ Scoring Function 'v_measure_score' :func:`metrics.v_measure_score` **Regression** -'explained_variance_score' :func:`metrics.explained_variance_score` +'explained_variance' :func:`metrics.explained_variance_score` 'neg_mean_absolute_error' :func:`metrics.mean_absolute_error` 'neg_mean_squared_error' :func:`metrics.mean_squared_error` 'neg_mean_squared_log_error' :func:`metrics.mean_squared_log_error` @@ -102,7 +102,7 @@ Usage examples: >>> model = svm.SVC() >>> cross_val_score(model, X, y, scoring='wrong_choice') Traceback (most recent call last): - ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'completeness_score', 'explained_variance_score', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] + ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'completeness_score', 'explained_variance', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] .. note:: diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index d056608d5570a..9e74c0bcc8515 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -413,7 +413,7 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, fowlkes_mallows_scorer = make_scorer(fowlkes_mallows_score) -SCORERS = dict(explained_variance_score=explained_variance_scorer, +SCORERS = dict(explained_variance=explained_variance_scorer, r2=r2_scorer, neg_median_absolute_error=neg_median_absolute_error_scorer, neg_mean_absolute_error=neg_mean_absolute_error_scorer, diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 447e00b2fc5c9..50bb27004551c 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -37,7 +37,7 @@ from sklearn.externals import joblib -REGRESSION_SCORERS = ['explained_variance_score', 'r2', +REGRESSION_SCORERS = ['explained_variance', 'r2', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'mean_absolute_error', From af05988521d3799a6f9d9e555d9ef27b82b5625e Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Tue, 8 Aug 2017 07:19:11 +0800 Subject: [PATCH 3/3] update what's new --- doc/whats_new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 075a675ab8937..a35f68e240949 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -139,6 +139,9 @@ Model selection and evaluation :class:`model_selection.RepeatedStratifiedKFold`. :issue:`8120` by `Neeraj Gangwar`_. +- Added a scorer based on :class:`metrics.explained_variance_score`. + :issue:`9259` by `Hanmin Qin `_. + Miscellaneous - Validation that input data contains no NaN or inf can now be suppressed