diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index 35fa24ac9a846..b7b1c490777bd 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -900,7 +900,7 @@ Miscellaneous manifold.smacof manifold.spectral_embedding manifold.trustworthiness - + .. _metrics_ref: @@ -981,6 +981,7 @@ details. metrics.mean_squared_error metrics.mean_squared_log_error metrics.median_absolute_error + metrics.mean_absolute_percentage_error metrics.r2_score metrics.mean_poisson_deviance metrics.mean_gamma_deviance diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index cdbbc0814c8b1..bb8b59889a3f5 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -54,51 +54,52 @@ the model and the data, like :func:`metrics.mean_squared_error`, are available as neg_mean_squared_error which return the negated value of the metric. -============================== ============================================= ================================== -Scoring Function Comment -============================== ============================================= ================================== +==================================== ============================================== ================================== +Scoring Function Comment +==================================== ============================================== ================================== **Classification** -'accuracy' :func:`metrics.accuracy_score` -'balanced_accuracy' :func:`metrics.balanced_accuracy_score` -'average_precision' :func:`metrics.average_precision_score` -'neg_brier_score' :func:`metrics.brier_score_loss` -'f1' :func:`metrics.f1_score` for binary targets -'f1_micro' :func:`metrics.f1_score` micro-averaged -'f1_macro' :func:`metrics.f1_score` macro-averaged -'f1_weighted' :func:`metrics.f1_score` weighted average -'f1_samples' :func:`metrics.f1_score` by multilabel sample -'neg_log_loss' :func:`metrics.log_loss` requires ``predict_proba`` support -'precision' etc. :func:`metrics.precision_score` suffixes apply as with 'f1' -'recall' etc. :func:`metrics.recall_score` suffixes apply as with 'f1' -'jaccard' etc. :func:`metrics.jaccard_score` suffixes apply as with 'f1' -'roc_auc' :func:`metrics.roc_auc_score` -'roc_auc_ovr' :func:`metrics.roc_auc_score` -'roc_auc_ovo' :func:`metrics.roc_auc_score` -'roc_auc_ovr_weighted' :func:`metrics.roc_auc_score` -'roc_auc_ovo_weighted' :func:`metrics.roc_auc_score` +'accuracy' :func:`metrics.accuracy_score` +'balanced_accuracy' :func:`metrics.balanced_accuracy_score` +'average_precision' :func:`metrics.average_precision_score` +'neg_brier_score' :func:`metrics.brier_score_loss` +'f1' :func:`metrics.f1_score` for binary targets +'f1_micro' :func:`metrics.f1_score` micro-averaged +'f1_macro' :func:`metrics.f1_score` macro-averaged +'f1_weighted' :func:`metrics.f1_score` weighted average +'f1_samples' :func:`metrics.f1_score` by multilabel sample +'neg_log_loss' :func:`metrics.log_loss` requires ``predict_proba`` support +'precision' etc. :func:`metrics.precision_score` suffixes apply as with 'f1' +'recall' etc. :func:`metrics.recall_score` suffixes apply as with 'f1' +'jaccard' etc. :func:`metrics.jaccard_score` suffixes apply as with 'f1' +'roc_auc' :func:`metrics.roc_auc_score` +'roc_auc_ovr' :func:`metrics.roc_auc_score` +'roc_auc_ovo' :func:`metrics.roc_auc_score` +'roc_auc_ovr_weighted' :func:`metrics.roc_auc_score` +'roc_auc_ovo_weighted' :func:`metrics.roc_auc_score` **Clustering** -'adjusted_mutual_info_score' :func:`metrics.adjusted_mutual_info_score` -'adjusted_rand_score' :func:`metrics.adjusted_rand_score` -'completeness_score' :func:`metrics.completeness_score` -'fowlkes_mallows_score' :func:`metrics.fowlkes_mallows_score` -'homogeneity_score' :func:`metrics.homogeneity_score` -'mutual_info_score' :func:`metrics.mutual_info_score` -'normalized_mutual_info_score' :func:`metrics.normalized_mutual_info_score` -'v_measure_score' :func:`metrics.v_measure_score` +'adjusted_mutual_info_score' :func:`metrics.adjusted_mutual_info_score` +'adjusted_rand_score' :func:`metrics.adjusted_rand_score` +'completeness_score' :func:`metrics.completeness_score` +'fowlkes_mallows_score' :func:`metrics.fowlkes_mallows_score` +'homogeneity_score' :func:`metrics.homogeneity_score` +'mutual_info_score' :func:`metrics.mutual_info_score` +'normalized_mutual_info_score' :func:`metrics.normalized_mutual_info_score` +'v_measure_score' :func:`metrics.v_measure_score` **Regression** -'explained_variance' :func:`metrics.explained_variance_score` -'max_error' :func:`metrics.max_error` -'neg_mean_absolute_error' :func:`metrics.mean_absolute_error` -'neg_mean_squared_error' :func:`metrics.mean_squared_error` -'neg_root_mean_squared_error' :func:`metrics.mean_squared_error` -'neg_mean_squared_log_error' :func:`metrics.mean_squared_log_error` -'neg_median_absolute_error' :func:`metrics.median_absolute_error` -'r2' :func:`metrics.r2_score` -'neg_mean_poisson_deviance' :func:`metrics.mean_poisson_deviance` -'neg_mean_gamma_deviance' :func:`metrics.mean_gamma_deviance` -============================== ============================================= ================================== +'explained_variance' :func:`metrics.explained_variance_score` +'max_error' :func:`metrics.max_error` +'neg_mean_absolute_error' :func:`metrics.mean_absolute_error` +'neg_mean_squared_error' :func:`metrics.mean_squared_error` +'neg_root_mean_squared_error' :func:`metrics.mean_squared_error` +'neg_mean_squared_log_error' :func:`metrics.mean_squared_log_error` +'neg_median_absolute_error' :func:`metrics.median_absolute_error` +'r2' :func:`metrics.r2_score` +'neg_mean_poisson_deviance' :func:`metrics.mean_poisson_deviance` +'neg_mean_gamma_deviance' :func:`metrics.mean_gamma_deviance` +'neg_mean_absolute_percentage_error' :func:`metrics.mean_absolute_percentage_error` +==================================== ============================================== ================================== Usage examples: @@ -1963,6 +1964,42 @@ function:: >>> mean_squared_log_error(y_true, y_pred) 0.044... +.. _mean_absolute_percentage_error: + +Mean absolute percentage error +------------------------------ +The :func:`mean_absolute_percentage_error` (MAPE), also known as mean absolute +percentage deviation (MAPD), is an evaluation metric for regression problems. +The idea of this metric is to be sensitive to relative errors. It is for example +not changed by a global scaling of the target variable. + +If :math:`\hat{y}_i` is the predicted value of the :math:`i`-th sample +and :math:`y_i` is the corresponding true value, then the mean absolute percentage +error (MAPE) estimated over :math:`n_{\text{samples}}` is defined as + +.. math:: + + \text{MAPE}(y, \hat{y}) = \frac{1}{n_{\text{samples}}} \sum_{i=0}^{n_{\text{samples}}-1} \frac{{}\left| y_i - \hat{y}_i \right|}{max(\epsilon, \left| y_i \right|)} + +where :math:`\epsilon` is an arbitrary small yet strictly positive number to +avoid undefined results when y is zero. + +The :func:`mean_absolute_percentage_error` function supports multioutput. + +Here is a small example of usage of the :func:`mean_absolute_percentage_error` +function:: + + >>> from sklearn.metrics import mean_absolute_percentage_error + >>> y_true = [1, 10, 1e6] + >>> y_pred = [0.9, 15, 1.2e6] + >>> mean_absolute_percentage_error(y_true, y_pred) + 0.2666... + +In above example, if we had used `mean_absolute_error`, it would have ignored +the small magnitude values and only reflected the error in prediction of highest +magnitude value. But that problem is resolved in case of MAPE because it calculates +relative percentage error with respect to actual output. + .. _median_absolute_error: Median absolute error diff --git a/doc/whats_new/_contributors.rst b/doc/whats_new/_contributors.rst index cc3957eca1592..ca0f8ede93afa 100644 --- a/doc/whats_new/_contributors.rst +++ b/doc/whats_new/_contributors.rst @@ -176,4 +176,4 @@ .. _Nicolas Hug: https://github.com/NicolasHug -.. _Guillaume Lemaitre: https://github.com/glemaitre +.. _Guillaume Lemaitre: https://github.com/glemaitre \ No newline at end of file diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index c41f761de1018..e637e367d401e 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -150,6 +150,12 @@ Changelog :mod:`sklearn.metrics` ...................... +- |Feature| Added :func:`metrics.mean_absolute_percentage_error` metric and + the associated scorer for regression problems. :issue:`10708` fixed with the + PR :pr:`15007` by :user:`Ashutosh Hathidara `. The scorer and + some practical test cases were taken from PR :pr:`10711` by + :user:`Mohamed Ali Jamaoui `. + - |Fix| Fixed a bug in :func:`metrics.mean_squared_error` where the average of multiple RMSE values was incorrectly calculated as the root of the average of multiple MSE values. diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index 8bcb047ec8161..be28005631963 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -64,6 +64,7 @@ from ._regression import mean_squared_error from ._regression import mean_squared_log_error from ._regression import median_absolute_error +from ._regression import mean_absolute_percentage_error from ._regression import r2_score from ._regression import mean_tweedie_deviance from ._regression import mean_poisson_deviance @@ -128,6 +129,7 @@ 'mean_gamma_deviance', 'mean_tweedie_deviance', 'median_absolute_error', + 'mean_absolute_percentage_error', 'multilabel_confusion_matrix', 'mutual_info_score', 'ndcg_score', diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 9064c018a24a9..ff3907d3c27ae 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -20,6 +20,7 @@ # Michael Eickenberg # Konstantin Shmelkov # Christian Lorentzen +# Ashutosh Hathidara # License: BSD 3 clause import numpy as np @@ -41,6 +42,7 @@ "mean_squared_error", "mean_squared_log_error", "median_absolute_error", + "mean_absolute_percentage_error", "r2_score", "explained_variance_score", "mean_tweedie_deviance", @@ -192,6 +194,81 @@ def mean_absolute_error(y_true, y_pred, *, return np.average(output_errors, weights=multioutput) +def mean_absolute_percentage_error(y_true, y_pred, + sample_weight=None, + multioutput='uniform_average'): + """Mean absolute percentage error regression loss + + Note here that we do not represent the output as a percentage in range + [0, 100]. Instead, we represent it in range [0, 1/eps]. Read more in the + :ref:`User Guide `. + + Parameters + ---------- + y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) + Ground truth (correct) target values. + + y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) + Estimated target values. + + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. + + multioutput : {'raw_values', 'uniform_average'} or array-like + Defines aggregating of multiple output values. + Array-like value defines weights used to average errors. + If input is list then the shape must be (n_outputs,). + + 'raw_values' : + Returns a full set of errors in case of multioutput input. + + 'uniform_average' : + Errors of all outputs are averaged with uniform weight. + + Returns + ------- + loss : float or ndarray of floats in the range [0, 1/eps] + If multioutput is 'raw_values', then mean absolute percentage error + is returned for each output separately. + If multioutput is 'uniform_average' or an ndarray of weights, then the + weighted average of all output errors is returned. + + MAPE output is non-negative floating point. The best value is 0.0. + But note the fact that bad predictions can lead to arbitarily large + MAPE values, especially if some y_true values are very close to zero. + Note that we return a large value instead of `inf` when y_true is zero. + + Examples + -------- + >>> from sklearn.metrics import mean_absolute_percentage_error + >>> y_true = [3, -0.5, 2, 7] + >>> y_pred = [2.5, 0.0, 2, 8] + >>> mean_absolute_percentage_error(y_true, y_pred) + 0.3273... + >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] + >>> y_pred = [[0, 2], [-1, 2], [8, -5]] + >>> mean_absolute_percentage_error(y_true, y_pred) + 0.5515... + >>> mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.3, 0.7]) + 0.6198... + """ + y_type, y_true, y_pred, multioutput = _check_reg_targets( + y_true, y_pred, multioutput) + check_consistent_length(y_true, y_pred, sample_weight) + epsilon = np.finfo(np.float64).eps + mape = np.abs(y_pred - y_true) / np.maximum(np.abs(y_true), epsilon) + output_errors = np.average(mape, + weights=sample_weight, axis=0) + if isinstance(multioutput, str): + if multioutput == 'raw_values': + return output_errors + elif multioutput == 'uniform_average': + # pass None as weights to np.average: uniform mean + multioutput = None + + return np.average(output_errors, weights=multioutput) + + @_deprecate_positional_args def mean_squared_error(y_true, y_pred, *, sample_weight=None, diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 1f05462928916..6098eae2d68a0 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -30,7 +30,7 @@ f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, log_loss, balanced_accuracy_score, explained_variance_score, - brier_score_loss, jaccard_score) + brier_score_loss, jaccard_score, mean_absolute_percentage_error) from .cluster import adjusted_rand_score from .cluster import homogeneity_score @@ -614,6 +614,9 @@ def make_scorer(score_func, *, greater_is_better=True, needs_proba=False, greater_is_better=False) neg_mean_absolute_error_scorer = make_scorer(mean_absolute_error, greater_is_better=False) +neg_mean_absolute_percentage_error_scorer = make_scorer( + mean_absolute_percentage_error, greater_is_better=False +) neg_median_absolute_error_scorer = make_scorer(median_absolute_error, greater_is_better=False) neg_root_mean_squared_error_scorer = make_scorer(mean_squared_error, @@ -674,6 +677,7 @@ def make_scorer(score_func, *, greater_is_better=True, needs_proba=False, max_error=max_error_scorer, neg_median_absolute_error=neg_median_absolute_error_scorer, neg_mean_absolute_error=neg_mean_absolute_error_scorer, + neg_mean_absolute_percentage_error=neg_mean_absolute_percentage_error_scorer, # noqa neg_mean_squared_error=neg_mean_squared_error_scorer, neg_mean_squared_log_error=neg_mean_squared_log_error_scorer, neg_root_mean_squared_error=neg_root_mean_squared_error_scorer, diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 7301d21a35f39..3f2ba83b474c7 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -41,6 +41,7 @@ from sklearn.metrics import max_error from sklearn.metrics import matthews_corrcoef from sklearn.metrics import mean_absolute_error +from sklearn.metrics import mean_absolute_percentage_error from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_tweedie_deviance from sklearn.metrics import mean_poisson_deviance @@ -98,6 +99,7 @@ "mean_absolute_error": mean_absolute_error, "mean_squared_error": mean_squared_error, "median_absolute_error": median_absolute_error, + "mean_absolute_percentage_error": mean_absolute_percentage_error, "explained_variance_score": explained_variance_score, "r2_score": partial(r2_score, multioutput='variance_weighted'), "mean_normal_deviance": partial(mean_tweedie_deviance, power=0), @@ -425,7 +427,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): # Regression metrics with "multioutput-continuous" format support MULTIOUTPUT_METRICS = { "mean_absolute_error", "median_absolute_error", "mean_squared_error", - "r2_score", "explained_variance_score" + "r2_score", "explained_variance_score", "mean_absolute_percentage_error" } # Symmetric with respect to their input arguments y_true and y_pred @@ -472,7 +474,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "macro_f0.5_score", "macro_f2_score", "macro_precision_score", "macro_recall_score", "log_loss", "hinge_loss", "mean_gamma_deviance", "mean_poisson_deviance", - "mean_compound_poisson_deviance" + "mean_compound_poisson_deviance", "mean_absolute_percentage_error" } @@ -1371,7 +1373,15 @@ def test_thresholded_multilabel_multioutput_permutations_invariance(name): y_true_perm = y_true[:, perm] current_score = metric(y_true_perm, y_score_perm) - assert_almost_equal(score, current_score) + if metric == mean_absolute_percentage_error: + assert np.isfinite(current_score) + assert current_score > 1e6 + # Here we are not comparing the values in case of MAPE because + # whenever y_true value is exactly zero, the MAPE value doesn't + # signify anything. Thus, in this case we are just expecting + # very large finite value. + else: + assert_almost_equal(score, current_score) @pytest.mark.parametrize( diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py index c5e9743539612..5b8406cf7a61f 100644 --- a/sklearn/metrics/tests/test_regression.py +++ b/sklearn/metrics/tests/test_regression.py @@ -13,6 +13,7 @@ from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_squared_log_error from sklearn.metrics import median_absolute_error +from sklearn.metrics import mean_absolute_percentage_error from sklearn.metrics import max_error from sklearn.metrics import r2_score from sklearn.metrics import mean_tweedie_deviance @@ -32,6 +33,9 @@ def test_regression_metrics(n_samples=50): np.log(1 + y_pred))) assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.) assert_almost_equal(median_absolute_error(y_true, y_pred), 1.) + mape = mean_absolute_percentage_error(y_true, y_pred) + assert np.isfinite(mape) + assert mape > 1e6 assert_almost_equal(max_error(y_true, y_pred), 1.) assert_almost_equal(r2_score(y_true, y_pred), 0.995, 2) assert_almost_equal(explained_variance_score(y_true, y_pred), 1.) @@ -86,6 +90,10 @@ def test_multioutput_regression(): error = mean_absolute_error(y_true, y_pred) assert_almost_equal(error, (1. + 2. / 3) / 4.) + error = np.around(mean_absolute_percentage_error(y_true, y_pred), + decimals=2) + assert np.isfinite(error) + assert error > 1e6 error = median_absolute_error(y_true, y_pred) assert_almost_equal(error, (1. + 1.) / 4.) @@ -100,6 +108,7 @@ def test_regression_metrics_at_limits(): assert_almost_equal(mean_squared_error([0.], [0.], squared=False), 0.00, 2) assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2) assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2) + assert_almost_equal(mean_absolute_percentage_error([0.], [0.]), 0.00, 2) assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2) assert_almost_equal(max_error([0.], [0.]), 0.00, 2) assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2) @@ -198,11 +207,14 @@ def test_regression_multioutput_array(): mse = mean_squared_error(y_true, y_pred, multioutput='raw_values') mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values') + mape = mean_absolute_percentage_error(y_true, y_pred, + multioutput='raw_values') r = r2_score(y_true, y_pred, multioutput='raw_values') evs = explained_variance_score(y_true, y_pred, multioutput='raw_values') assert_array_almost_equal(mse, [0.125, 0.5625], decimal=2) assert_array_almost_equal(mae, [0.25, 0.625], decimal=2) + assert_array_almost_equal(mape, [0.0778, 0.2262], decimal=2) assert_array_almost_equal(r, [0.95, 0.93], decimal=2) assert_array_almost_equal(evs, [0.95, 0.93], decimal=2) @@ -254,12 +266,15 @@ def test_regression_custom_weights(): rmsew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6], squared=False) maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6]) + mapew = mean_absolute_percentage_error(y_true, y_pred, + multioutput=[0.4, 0.6]) rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6]) evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6]) assert_almost_equal(msew, 0.39, decimal=2) assert_almost_equal(rmsew, 0.59, decimal=2) assert_almost_equal(maew, 0.475, decimal=3) + assert_almost_equal(mapew, 0.1668, decimal=2) assert_almost_equal(rw, 0.94, decimal=2) assert_almost_equal(evsw, 0.94, decimal=2) @@ -308,3 +323,10 @@ def test_tweedie_deviance_continuity(): assert_allclose(mean_tweedie_deviance(y_true, y_pred, power=2 + 1e-10), mean_tweedie_deviance(y_true, y_pred, power=2), atol=1e-6) + + +def test_mean_absolute_percentage_error(): + random_number_generator = np.random.RandomState(42) + y_true = random_number_generator.exponential(size=100) + y_pred = 1.2 * y_true + assert mean_absolute_percentage_error(y_true, y_pred) == pytest.approx(0.2) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index f49197a706e70..227fd8bbadee9 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -43,10 +43,12 @@ REGRESSION_SCORERS = ['explained_variance', 'r2', 'neg_mean_absolute_error', 'neg_mean_squared_error', + 'neg_mean_absolute_percentage_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'neg_root_mean_squared_error', 'mean_absolute_error', + 'mean_absolute_percentage_error', 'mean_squared_error', 'median_absolute_error', 'max_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance']