From 84d3813c362f8582bcc2ca8cd4ab2323919c77ea Mon Sep 17 00:00:00 2001 From: marctorrellas Date: Fri, 6 Sep 2019 04:20:59 +0100 Subject: [PATCH 01/10] first commit for issue 14876: zero_division parameter --- doc/whats_new/v0.21.rst | 7 + sklearn/metrics/classification.py | 130 +++++++++++--- sklearn/metrics/tests/test_classification.py | 180 +++++++++++++------ 3 files changed, 235 insertions(+), 82 deletions(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 28c1cc40542e2..e275cf3e6efc8 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -69,6 +69,13 @@ Support for Python 3.4 and below has been officially dropped. :mod:`sklearn.metrics` ...................... +- |Feature| Added a new parameter ``zero_division`` to multiple classification +metrics: :func:`precision_score`, :func:`recall_score`, :func:`f1_score`, +:func:`fbeta_score`, :func:`precision_recall_fscore_support`, +:func:`classification_report`. This allows to set returned value for +ill-defined metrics. + :issue:`14876` by :user:`Marc Torrellas Socastro `. + - |Feature| Added the :func:`metrics.max_error` metric and a corresponding ``'max_error'`` scorer for single output regression. :issue:`12232` by :user:`Krishna Sangeeth `. diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index e8f7f85163259..1a006dfe01983 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -42,6 +42,17 @@ from ..exceptions import UndefinedMetricWarning +def _check_zero_division(zero_division): + if isinstance(zero_division, str): + if zero_division != "warn": + raise ValueError('zero_division must be one of ["warn", 0, 1]') + elif isinstance(zero_division, (int, float)): + if zero_division not in [0, 1]: + raise ValueError('zero_division must be one of ["warn", 0, 1]') + else: + raise TypeError('zero_division must be one of ["warn", 0, 1]') + + def _check_targets(y_true, y_pred): """Check that y_true and y_pred belong to the same classification task @@ -822,7 +833,7 @@ def zero_one_loss(y_true, y_pred, normalize=True, sample_weight=None): def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary', - sample_weight=None): + sample_weight=None, zero_division="warn"): """Compute the F1 score, also known as balanced F-score or F-measure The F1 score can be interpreted as a weighted average of the precision and @@ -892,6 +903,12 @@ def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary', sample_weight : array-like of shape = [n_samples], optional Sample weights. + zero_division : string or int, default="warn" + Sets the behavior when there is a zero division. If set to + ("warn"|0)/1, returns 0/1 when both precision and recall are zero + (calculated using the same value for this parameter). + If ``zero_division != "warn"``, warnings are suppressed + Returns ------- f1_score : float or array of float, shape = [n_unique_labels] @@ -921,15 +938,20 @@ def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary', 0.26... >>> f1_score(y_true, y_pred, average=None) array([0.8, 0. , 0. ]) + >>> y_true = [0, 0, 0, 0, 0, 0] + >>> y_pred = [0, 0, 0, 0, 0, 0] + >>> f1_score(y_true, y_pred, zero_division=1) + 1.0... """ return fbeta_score(y_true, y_pred, 1, labels=labels, pos_label=pos_label, average=average, - sample_weight=sample_weight) + sample_weight=sample_weight, + zero_division=zero_division) def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1, - average='binary', sample_weight=None): + average='binary', sample_weight=None, zero_division="warn"): """Compute the F-beta score The F-beta score is the weighted harmonic mean of precision and recall, @@ -999,6 +1021,12 @@ def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1, sample_weight : array-like of shape = [n_samples], optional Sample weights. + zero_division : string or int, default="warn" + Sets the behavior when there is a zero division. If set to + ("warn"|0)/1, returns 0/1 when both precision and recall are zero + (calculated using the same value for this parameter). + If ``zero_division != "warn"``, warnings are suppressed + Returns ------- fbeta_score : float (if average is not None) or array of float, shape =\ @@ -1043,15 +1071,17 @@ def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1, pos_label=pos_label, average=average, warn_for=('f-score',), - sample_weight=sample_weight) + sample_weight=sample_weight, + zero_division=zero_division) return f -def _prf_divide(numerator, denominator, metric, modifier, average, warn_for): +def _prf_divide(numerator, denominator, metric, + modifier, average, warn_for, zero_division): """Performs division and handles divide-by-zero. - On zero-division, sets the corresponding result elements to zero - and raises a warning. + On zero-division, sets the corresponding result elements equal to + ``zero_division`` and raises a warning. The metric, modifier and average arguments are used only for determining an appropriate warning. @@ -1062,7 +1092,12 @@ def _prf_divide(numerator, denominator, metric, modifier, average, warn_for): return result # remove infs - result[mask] = 0.0 + result[mask] = float(zero_division == 1) + + # the user will be removing warnings if zero_division is set to something + # different than its default value + if zero_division != "warn": + return result # build appropriate warning # E.g. "Precision and F-score are ill-defined and being set to 0.0 in @@ -1095,7 +1130,8 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, pos_label=1, average=None, warn_for=('precision', 'recall', 'f-score'), - sample_weight=None): + sample_weight=None, + zero_division="warn"): """Compute precision, recall, F-measure and support for each class The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of @@ -1179,6 +1215,12 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, sample_weight : array-like of shape = [n_samples], optional Sample weights. + zero_division : string or int, default="warn" + Sets the behavior when there is a zero division. If set to + ("warn"|0)/1, returns 0/1 for precision, recall, and F-measure when + their computation implies a zero division. If + ``zero_division != "warn"``, warnings are suppressed + Returns ------- precision : float (if average is not None) or array of float, shape =\ @@ -1233,6 +1275,7 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, array([2, 2, 2])) """ + _check_zero_division(zero_division) average_options = (None, 'micro', 'macro', 'weighted', 'samples') if average not in average_options and average != 'binary': raise ValueError('average has to be one of ' + @@ -1249,7 +1292,8 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, if pos_label not in present_labels: if len(present_labels) < 2: # Only negative labels - return (0., 0., 0., 0) + zero_division = float(zero_division == 1) + return zero_division, zero_division, zero_division, None else: raise ValueError("pos_label=%r is not a valid label: %r" % (pos_label, present_labels)) @@ -1285,22 +1329,31 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, # Oddly, we may get an "invalid" rather than a "divide" error # here. - precision = _prf_divide(tp_sum, pred_sum, - 'precision', 'predicted', average, warn_for) - recall = _prf_divide(tp_sum, true_sum, - 'recall', 'true', average, warn_for) + precision = _prf_divide(tp_sum, pred_sum, 'precision', + 'predicted', average, warn_for, zero_division) + recall = _prf_divide(tp_sum, true_sum, 'recall', + 'true', average, warn_for, zero_division) # Don't need to warn for F: either P or R warned, or tp == 0 where pos # and true are nonzero, in which case, F is well-defined and zero f_score = ((1 + beta2) * precision * recall / (beta2 * precision + recall)) f_score[tp_sum == 0] = 0.0 + f_score[(true_sum == 0) & (pred_sum == 0)] = float(zero_division == 1) # Average the results if average == 'weighted': weights = true_sum if weights.sum() == 0: - return 0, 0, 0, None + # precision is zero_division if there are no positive predictions + # recall is zero_division if there are no positive labels + # fscore is zero_division if all labels AND predictions are + # negative + return (float(zero_division == 1) if pred_sum.sum() == 0 else 0, + float(zero_division == 1), + float(zero_division == 1) if pred_sum.sum() == 0 else 0, + None) + elif average == 'samples': weights = sample_weight else: @@ -1317,7 +1370,8 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, def precision_score(y_true, y_pred, labels=None, pos_label=1, - average='binary', sample_weight=None): + average='binary', sample_weight=None, + zero_division="warn"): """Compute the precision The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of @@ -1383,6 +1437,11 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1, sample_weight : array-like of shape = [n_samples], optional Sample weights. + zero_division : string or int, default="warn" + Sets the behavior when there is a zero division. If set to + ("warn"|0)/1, returns 0/1 when there are no positive predictions. + If ``zero_division != "warn"``, warnings are suppressed + Returns ------- precision : float (if average is not None) or array of float, shape =\ @@ -1409,19 +1468,24 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1, 0.22... >>> precision_score(y_true, y_pred, average=None) # doctest: +ELLIPSIS array([0.66..., 0. , 0. ]) - + >>> y_pred = [0, 0, 0, 0, 0, 0] + >>> precision_score(y_true, y_pred) # doctest: +ELLIPSIS + 0.0... + >>> precision_score(y_true, y_pred, zero_division=1) # doctest: +ELLIPSIS + 1.0... """ p, _, _, _ = precision_recall_fscore_support(y_true, y_pred, labels=labels, pos_label=pos_label, average=average, warn_for=('precision',), - sample_weight=sample_weight) + sample_weight=sample_weight, + zero_division=zero_division) return p def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary', - sample_weight=None): + sample_weight=None, zero_division="warn"): """Compute the recall The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of @@ -1486,6 +1550,11 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary', sample_weight : array-like of shape = [n_samples], optional Sample weights. + zero_division : string or int, default="warn" + Sets the behavior when there is a zero division. If set to + ("warn"|0)/1, returns 0/1 when there are no positive labels. + If ``zero_division != "warn"``, warnings are suppressed + Returns ------- recall : float (if average is not None) or array of float, shape =\ @@ -1511,6 +1580,11 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary', 0.33... >>> recall_score(y_true, y_pred, average=None) array([1., 0., 0.]) + >>> y_true = [0, 0, 0, 0, 0, 0] + >>> recall_score(y_true, y_pred) + 0.0... + >>> recall_score(y_true, y_pred, zero_division=1) + 1.0... """ _, r, _, _ = precision_recall_fscore_support(y_true, y_pred, @@ -1518,7 +1592,8 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary', pos_label=pos_label, average=average, warn_for=('recall',), - sample_weight=sample_weight) + sample_weight=sample_weight, + zero_division=zero_division) return r @@ -1600,7 +1675,8 @@ def balanced_accuracy_score(y_true, y_pred, sample_weight=None, def classification_report(y_true, y_pred, labels=None, target_names=None, - sample_weight=None, digits=2, output_dict=False): + sample_weight=None, digits=2, output_dict=False, + zero_division="warn"): """Build a text report showing the main classification metrics Read more in the :ref:`User Guide `. @@ -1630,6 +1706,12 @@ def classification_report(y_true, y_pred, labels=None, target_names=None, output_dict : bool (default = False) If True, return output as dict + zero_division : string or int, default="warn" + Sets the behavior when there is a zero division. If set to + ("warn"|0)/1, returns 0/1 for precision, recall, and f1 when their + computation implies a zero division. If ``zero_division != "warn"``, + warnings are suppressed + Returns ------- report : string / dict @@ -1709,7 +1791,8 @@ class 2 1.00 0.67 0.80 3 p, r, f1, s = precision_recall_fscore_support(y_true, y_pred, labels=labels, average=None, - sample_weight=sample_weight) + sample_weight=sample_weight, + zero_division=zero_division) rows = zip(target_names, p, r, f1, s) if y_type.startswith('multilabel'): @@ -1875,7 +1958,8 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None, y_true : array-like or label indicator matrix Ground truth (correct) labels for n_samples samples. - y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,) + y_pred : array-like of float, shape = (n_samples, n_classes) or + (n_samples,) Predicted probabilities, as returned by a classifier's predict_proba method. If ``y_pred.shape = (n_samples,)`` the probabilities provided are assumed to be that of the diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 3152521f23b77..79d039b0802ff 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -528,8 +528,12 @@ def test_cohen_kappa(): y1 = np.array([0] * 46 + [1] * 44 + [2] * 10) y2 = np.array([0] * 50 + [1] * 40 + [2] * 10) assert_almost_equal(cohen_kappa_score(y1, y2), .9315, decimal=4) - assert_almost_equal(cohen_kappa_score(y1, y2, weights="linear"), .9412, decimal=4) - assert_almost_equal(cohen_kappa_score(y1, y2, weights="quadratic"), .9541, decimal=4) + assert_almost_equal( + cohen_kappa_score(y1, y2, weights="linear"), .9412, decimal=4 + ) + assert_almost_equal(cohen_kappa_score( + y1, y2, weights="quadratic"), .9541, decimal=4 + ) @ignore_warnings @@ -1348,26 +1352,34 @@ def test_precision_recall_f1_score_with_an_empty_prediction(): @pytest.mark.parametrize('beta', [1]) @pytest.mark.parametrize('average', ["macro", "micro", "weighted", "samples"]) -def test_precision_recall_f1_no_labels(beta, average): +@pytest.mark.parametrize('zero_division', ["warn", 0, 1]) +def test_precision_recall_f1_no_labels(beta, average, zero_division): y_true = np.zeros((20, 3)) y_pred = np.zeros_like(y_true) - p, r, f, s = assert_warns(UndefinedMetricWarning, - precision_recall_fscore_support, - y_true, y_pred, average=average, - beta=beta) - assert_almost_equal(p, 0) - assert_almost_equal(r, 0) - assert_almost_equal(f, 0) + func = precision_recall_fscore_support + my_assert = (assert_warns if zero_division == "warn" + else assert_no_warnings) + tmp = ([UndefinedMetricWarning, func] if zero_division == "warn" + else [func]) + p, r, f, s = my_assert(*tmp, y_true, y_pred, average=average, + beta=beta, zero_division=zero_division) + tmp = ([UndefinedMetricWarning, fbeta_score] if zero_division == "warn" + else [fbeta_score]) + fbeta = my_assert(*tmp, y_true, y_pred, beta=beta, + average=average, zero_division=zero_division) + + zero_division = float(zero_division == 1) + assert_almost_equal(p, zero_division) + assert_almost_equal(r, zero_division) + assert_almost_equal(f, zero_division) assert_equal(s, None) - fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, - y_true, y_pred, - beta=beta, average=average) - assert_almost_equal(fbeta, 0) + assert_almost_equal(fbeta, float(zero_division == 1)) -def test_precision_recall_f1_no_labels_average_none(): +@pytest.mark.parametrize('zero_division', ["warn", 0, 1]) +def test_precision_recall_f1_no_labels_average_none(zero_division): y_true = np.zeros((20, 3)) y_pred = np.zeros_like(y_true) @@ -1381,100 +1393,143 @@ def test_precision_recall_f1_no_labels_average_none(): # |y_i| = [0, 0, 0] # |y_hat_i| = [0, 0, 0] - p, r, f, s = assert_warns(UndefinedMetricWarning, - precision_recall_fscore_support, - y_true, y_pred, average=None, beta=beta) - assert_array_almost_equal(p, [0, 0, 0], 2) - assert_array_almost_equal(r, [0, 0, 0], 2) - assert_array_almost_equal(f, [0, 0, 0], 2) + func = precision_recall_fscore_support + my_assert = (assert_warns if zero_division == "warn" + else assert_no_warnings) + tmp = ([UndefinedMetricWarning, func] if zero_division == "warn" + else [func]) + p, r, f, s = my_assert(*tmp, y_true, y_pred, average=None, + beta=beta, zero_division=zero_division) + tmp = ([UndefinedMetricWarning, fbeta_score] if zero_division == "warn" + else [fbeta_score]) + fbeta = my_assert(*tmp, y_true, y_pred, beta=beta, + average=None, zero_division=zero_division) + + zero_division = float(zero_division == 1) + assert_array_almost_equal( + p, [zero_division, zero_division, zero_division], 2 + ) + assert_array_almost_equal( + r, [zero_division, zero_division, zero_division], 2 + ) + assert_array_almost_equal( + f, [zero_division, zero_division, zero_division], 2 + ) assert_array_almost_equal(s, [0, 0, 0], 2) - fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, - y_true, y_pred, beta=beta, average=None) - assert_array_almost_equal(fbeta, [0, 0, 0], 2) + assert_array_almost_equal( + fbeta, [zero_division, zero_division, zero_division], 2 + ) -def test_prf_warnings(): +@pytest.mark.parametrize('zero_division', ["warn", 0, 1]) +def test_prf_warnings(zero_division): # average of per-label scores f, w = precision_recall_fscore_support, UndefinedMetricWarning - my_assert = assert_warns_message + my_assert = (assert_warns_message + if zero_division == "warn" else assert_no_warnings) for average in [None, 'weighted', 'macro']: + msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 in labels with no predicted samples.') - my_assert(w, msg, f, [0, 1, 2], [1, 1, 2], average=average) + tmp = [w, msg, f] if zero_division == "warn" else [f] + my_assert(*tmp, [0, 1, 2], [1, 1, 2], average=average, + zero_division=zero_division) msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 in labels with no true samples.') - my_assert(w, msg, f, [1, 1, 2], [0, 1, 2], average=average) + tmp = [w, msg, f] if zero_division == "warn" else [f] + my_assert(*tmp, [1, 1, 2], [0, 1, 2], average=average, + zero_division=zero_division) # average of per-sample scores msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 in samples with no predicted labels.') - my_assert(w, msg, f, np.array([[1, 0], [1, 0]]), - np.array([[1, 0], [0, 0]]), average='samples') + tmp = [w, msg, f] if zero_division == "warn" else [f] + my_assert(*tmp, np.array([[1, 0], [1, 0]]), + np.array([[1, 0], [0, 0]]), average='samples', + zero_division=zero_division) msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 in samples with no true labels.') - my_assert(w, msg, f, np.array([[1, 0], [0, 0]]), + tmp = [w, msg, f] if zero_division == "warn" else [f] + my_assert(*tmp, np.array([[1, 0], [0, 0]]), np.array([[1, 0], [1, 0]]), - average='samples') + average='samples', zero_division=zero_division) # single score: micro-average msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 due to no predicted samples.') - my_assert(w, msg, f, np.array([[1, 1], [1, 1]]), - np.array([[0, 0], [0, 0]]), average='micro') + tmp = [w, msg, f] if zero_division == "warn" else [f] + my_assert(*tmp, np.array([[1, 1], [1, 1]]), + np.array([[0, 0], [0, 0]]), average='micro', + zero_division=zero_division) msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 due to no true samples.') - my_assert(w, msg, f, np.array([[0, 0], [0, 0]]), - np.array([[1, 1], [1, 1]]), average='micro') + tmp = [w, msg, f] if zero_division == "warn" else [f] + my_assert(*tmp, np.array([[0, 0], [0, 0]]), + np.array([[1, 1], [1, 1]]), average='micro', + zero_division=zero_division) # single positive label msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 due to no predicted samples.') - my_assert(w, msg, f, [1, 1], [-1, -1], average='binary') + tmp = [w, msg, f] if zero_division == "warn" else [f] + my_assert(*tmp, [1, 1], [-1, -1], average='binary', + zero_division=zero_division) msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 due to no true samples.') - my_assert(w, msg, f, [-1, -1], [1, 1], average='binary') + tmp = [w, msg, f] if zero_division == "warn" else [f] + my_assert(*tmp, [-1, -1], [1, 1], average='binary', + zero_division=zero_division) -def test_recall_warnings(): +@pytest.mark.parametrize('zero_division', ["warn", 0, 1]) +def test_recall_warnings(zero_division): assert_no_warnings(recall_score, np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), - average='micro') + average='micro', zero_division=zero_division) clean_warning_registry() with warnings.catch_warnings(record=True) as record: warnings.simplefilter('always') recall_score(np.array([[0, 0], [0, 0]]), np.array([[1, 1], [1, 1]]), - average='micro') - assert_equal(str(record.pop().message), - 'Recall is ill-defined and ' - 'being set to 0.0 due to no true samples.') + average='micro', zero_division=zero_division) + if zero_division == "warn": + assert_equal(str(record.pop().message), + 'Recall is ill-defined and ' + 'being set to 0.0 due to no true samples.') + else: + assert_equal(len(record), 0) -def test_precision_warnings(): +@pytest.mark.parametrize('zero_division', ["warn", 0, 1]) +def test_precision_warnings(zero_division): clean_warning_registry() with warnings.catch_warnings(record=True) as record: warnings.simplefilter('always') precision_score(np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), - average='micro') - assert_equal(str(record.pop().message), - 'Precision is ill-defined and ' - 'being set to 0.0 due to no predicted samples.') + average='micro', zero_division=zero_division) + if zero_division == "warn": + assert_equal(str(record.pop().message), + 'Precision is ill-defined and ' + 'being set to 0.0 due to no predicted samples.') + else: + assert_equal(len(record), 0) assert_no_warnings(precision_score, np.array([[0, 0], [0, 0]]), np.array([[1, 1], [1, 1]]), - average='micro') + average='micro', zero_division=zero_division) -def test_fscore_warnings(): +@pytest.mark.parametrize('zero_division', ["warn", 0, 1]) +def test_fscore_warnings(zero_division): clean_warning_registry() with warnings.catch_warnings(record=True) as record: warnings.simplefilter('always') @@ -1482,16 +1537,23 @@ def test_fscore_warnings(): for score in [f1_score, partial(fbeta_score, beta=2)]: score(np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), - average='micro') - assert_equal(str(record.pop().message), - 'F-score is ill-defined and ' - 'being set to 0.0 due to no predicted samples.') + average='micro', zero_division=zero_division) + if zero_division == "warn": + assert_equal(str(record.pop().message), + 'F-score is ill-defined and ' + 'being set to 0.0 due to no predicted samples.') + else: + assert_equal(len(record), 0) + score(np.array([[0, 0], [0, 0]]), np.array([[1, 1], [1, 1]]), - average='micro') - assert_equal(str(record.pop().message), - 'F-score is ill-defined and ' - 'being set to 0.0 due to no true samples.') + average='micro', zero_division=zero_division) + if zero_division == "warn": + assert_equal(str(record.pop().message), + 'F-score is ill-defined and ' + 'being set to 0.0 due to no true samples.') + else: + assert_equal(len(record), 0) def test_prf_average_binary_data_non_binary(): From 16dcda17f3d2b6c04399517611f64491f5b106cd Mon Sep 17 00:00:00 2001 From: marctorrellas Date: Sat, 7 Sep 2019 13:14:59 +0100 Subject: [PATCH 02/10] - merge with master - Changed whats_new to 0.22 - F-score only warns if both prec and rec are ill-defined - new private method to simplify _prf_divide --- doc/developers/advanced_installation.rst | 12 +- doc/whats_new/v0.21.rst | 7 - doc/whats_new/v0.22.rst | 23 +++- .../plot_out_of_core_classification.py | 21 +-- sklearn/cluster/bicluster.py | 8 +- sklearn/compose/_target.py | 14 +- sklearn/compose/tests/test_target.py | 38 ++++++ sklearn/linear_model/stochastic_gradient.py | 2 +- sklearn/metrics/classification.py | 128 +++++++++++------- sklearn/metrics/tests/test_classification.py | 114 +++++++++------- 10 files changed, 229 insertions(+), 138 deletions(-) diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst index 4f4fb2b073580..0eaac27699d37 100644 --- a/doc/developers/advanced_installation.rst +++ b/doc/developers/advanced_installation.rst @@ -242,12 +242,20 @@ The above commands assume that you have the Python installation folder in your PATH environment variable. You will need `Build Tools for Visual Studio 2017 -`_. +`_. + +.. warning:: + You DO NOT need to install Visual Studio 2019. + You only need the "Build Tools for Visual Studio 2019", + under "All downloads" -> "Tools for Visual Studio 2019". For 64-bit Python, configure the build environment with:: SET DISTUTILS_USE_SDK=1 - "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64 + "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64 + +Please be aware that the path above might be different from user to user. +The aim is to point to the "vcvarsall.bat" file. And build scikit-learn from this environment:: diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index fc4f173d9a1ec..5f5d0e3d04dc5 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -721,13 +721,6 @@ Support for Python 3.4 and below has been officially dropped. :mod:`sklearn.metrics` ...................... -- |Feature| Added a new parameter ``zero_division`` to multiple classification -metrics: :func:`precision_score`, :func:`recall_score`, :func:`f1_score`, -:func:`fbeta_score`, :func:`precision_recall_fscore_support`, -:func:`classification_report`. This allows to set returned value for -ill-defined metrics. - :issue:`14876` by :user:`Marc Torrellas Socastro `. - - |Feature| Added the :func:`metrics.max_error` metric and a corresponding ``'max_error'`` scorer for single output regression. :pr:`12232` by :user:`Krishna Sangeeth `. diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index 8987ee1cc359b..0bb06752d7c0f 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -75,6 +75,10 @@ Changelog 1.12. :pr:`14510` by :user:`Guillaume Lemaitre `. +- |Fix| Fixed a bug in :class:`compose.TransformedTargetRegrssor` which did not + pass `**fit_params` to the underlying regressor. + :pr:`14890` by :user:`Miguel Cabrera `. + :mod:`sklearn.datasets` ....................... @@ -219,7 +223,7 @@ Changelog -|FIX| Fixed a bug where :class:`kernel_approximation.Nystroem` raised a `KeyError` when using `kernel="precomputed"`. - :pr:`14706` by :user:`Venkatachalam N `. + :pr:`14706` by :user:`Venkatachalam N `. :mod:`sklearn.linear_model` ........................... @@ -262,6 +266,13 @@ Changelog :mod:`sklearn.metrics` ...................... +- |Feature| Added a new parameter ``zero_division`` to multiple classification + metrics: :func:`precision_score`, :func:`recall_score`, :func:`f1_score`, + :func:`fbeta_score`, :func:`precision_recall_fscore_support`, + :func:`classification_report`. This allows to set returned value for + ill-defined metrics. + :pr:`14900` by :user:`Marc Torrellas Socastro `. + - |Feature| Added the :func:`metrics.nan_euclidean_distances` metric, which calculates euclidean distances in the presence of missing values. :issue:`12852` by :user:`Ashim Bhattarai ` and @@ -343,19 +354,19 @@ Changelog - |Enhancement| SVM now throws more specific error when fit on non-square data and kernel = precomputed. :class:`svm.BaseLibSVM` :pr:`14336` by :user:`Gregory Dexter `. - + :mod:`sklearn.tree` ................... - |Feature| Adds minimal cost complexity pruning, controlled by ``ccp_alpha``, to :class:`tree.DecisionTreeClassifier`, :class:`tree.DecisionTreeRegressor`, :class:`tree.ExtraTreeClassifier`, :class:`tree.ExtraTreeRegressor`, - :class:`ensemble.RandomForestClassifier`, + :class:`ensemble.RandomForestClassifier`, :class:`ensemble.RandomForestRegressor`, - :class:`ensemble.ExtraTreesClassifier`, + :class:`ensemble.ExtraTreesClassifier`, :class:`ensemble.ExtraTreesRegressor`, - :class:`ensemble.RandomTreesEmbedding`, - :class:`ensemble.GradientBoostingClassifier`, + :class:`ensemble.RandomTreesEmbedding`, + :class:`ensemble.GradientBoostingClassifier`, and :class:`ensemble.GradientBoostingRegressor`. :pr:`12887` by `Thomas Fan`_. diff --git a/examples/applications/plot_out_of_core_classification.py b/examples/applications/plot_out_of_core_classification.py index a2f34b5994b15..0c89a31a64b0b 100644 --- a/examples/applications/plot_out_of_core_classification.py +++ b/examples/applications/plot_out_of_core_classification.py @@ -11,16 +11,6 @@ HashingVectorizer that will project each example into the same feature space. This is especially useful in the case of text classification where new features (words) may appear in each batch. - -The dataset used in this example is Reuters-21578 as provided by the UCI ML -repository. It will be automatically downloaded and uncompressed on first run. - -The plot represents the learning curve of the classifier: the evolution -of classification accuracy over the course of the mini-batches. Accuracy is -measured on the first 1000 samples, held out as a validation set. - -To limit the memory consumption, we queue examples up to a fixed amount before -feeding them to the learner. """ # Authors: Eustache Diemert @@ -57,6 +47,10 @@ def _not_in_sphinx(): # Reuters Dataset related routines # -------------------------------- # +# The dataset used in this example is Reuters-21578 as provided by the UCI ML +# repository. It will be automatically downloaded and uncompressed on first +# run. + class ReutersParser(HTMLParser): @@ -320,6 +314,13 @@ def progress(cls_name, stats): ############################################################################### # Plot results # ------------ +# +# The plot represents the learning curve of the classifier: the evolution +# of classification accuracy over the course of the mini-batches. Accuracy is +# measured on the first 1000 samples, held out as a validation set. +# +# To limit the memory consumption, we queue examples up to a fixed amount +# before feeding them to the learner. def plot_accuracy(x, y, x_legend): diff --git a/sklearn/cluster/bicluster.py b/sklearn/cluster/bicluster.py index d0e4aecd5d99c..5bfd335549012 100644 --- a/sklearn/cluster/bicluster.py +++ b/sklearn/cluster/bicluster.py @@ -1,9 +1,7 @@ -"""Spectral biclustering algorithms. +"""Spectral biclustering algorithms.""" +# Authors : Kemal Eren +# License: BSD 3 clause -Authors : Kemal Eren -License: BSD 3 clause - -""" from abc import ABCMeta, abstractmethod import numpy as np diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py index ce0c76d6486c5..9b94e0275c368 100644 --- a/sklearn/compose/_target.py +++ b/sklearn/compose/_target.py @@ -148,7 +148,7 @@ def _fit_transformer(self, y): " you are sure you want to proceed regardless" ", set 'check_inverse=False'", UserWarning) - def fit(self, X, y, sample_weight=None): + def fit(self, X, y, **fit_params): """Fit the model according to the given training data. Parameters @@ -160,9 +160,10 @@ def fit(self, X, y, sample_weight=None): y : array-like, shape (n_samples,) Target values. - sample_weight : array-like, shape (n_samples,) optional - Array of weights that are assigned to individual samples. - If not provided, then each sample is given unit weight. + **fit_params : dict of string -> object + Parameters passed to the ``fit`` method of the underlying + regressor. + Returns ------- @@ -197,10 +198,7 @@ def fit(self, X, y, sample_weight=None): else: self.regressor_ = clone(self.regressor) - if sample_weight is None: - self.regressor_.fit(X, y_trans) - else: - self.regressor_.fit(X, y_trans, sample_weight=sample_weight) + self.regressor_.fit(X, y_trans, **fit_params) return self diff --git a/sklearn/compose/tests/test_target.py b/sklearn/compose/tests/test_target.py index cab28f406c5f9..77507b4026f2b 100644 --- a/sklearn/compose/tests/test_target.py +++ b/sklearn/compose/tests/test_target.py @@ -14,6 +14,8 @@ from sklearn.preprocessing import FunctionTransformer from sklearn.preprocessing import StandardScaler +from sklearn.pipeline import Pipeline + from sklearn.linear_model import LinearRegression, Lasso from sklearn import datasets @@ -294,3 +296,39 @@ def test_transform_target_regressor_count_fit(check_inverse): ) ttr.fit(X, y) assert ttr.transformer_.fit_counter == 1 + + +class DummyRegressorWithExtraFitParams(DummyRegressor): + def fit(self, X, y, sample_weight=None, check_input=True): + # on the test below we force this to false, we make sure this is + # actually passed to the regressor + assert not check_input + return super().fit(X, y, sample_weight) + + +def test_transform_target_regressor_pass_fit_parameters(): + X, y = friedman + regr = TransformedTargetRegressor( + regressor=DummyRegressorWithExtraFitParams(), + transformer=DummyTransformer() + ) + + regr.fit(X, y, check_input=False) + assert regr.transformer_.fit_counter == 1 + + +def test_transform_target_regressor_route_pipeline(): + X, y = friedman + + regr = TransformedTargetRegressor( + regressor=DummyRegressorWithExtraFitParams(), + transformer=DummyTransformer() + ) + estimators = [ + ('normalize', StandardScaler()), ('est', regr) + ] + + pip = Pipeline(estimators) + pip.fit(X, y, **{'est__check_input': False}) + + assert regr.transformer_.fit_counter == 1 diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index 684346a09d1a0..6a11c4a97ee2f 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -124,7 +124,7 @@ def _validate_params(self, for_partial_fit=False): if self.n_iter_no_change < 1: raise ValueError("n_iter_no_change must be >= 1") if not (0.0 < self.validation_fraction < 1.0): - raise ValueError("validation_fraction must be in ]0, 1[") + raise ValueError("validation_fraction must be in range (0, 1)") if self.learning_rate in ("constant", "invscaling", "adaptive"): if self.eta0 <= 0.0: raise ValueError("eta0 must be > 0") diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index cf1307d252819..166e8a029aa1d 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1028,7 +1028,7 @@ def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary', sample_weight : array-like of shape = [n_samples], optional Sample weights. - zero_division : string or int, default="warn" + zero_division : "warn", 0 or 1, default="warn" Sets the behavior when there is a zero division. If set to ("warn"|0)/1, returns 0/1 when both precision and recall are zero (calculated using the same value for this parameter). @@ -1068,6 +1068,13 @@ def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary', >>> f1_score(y_true, y_pred, zero_division=1) 1.0... + Notes + ----- + When ``true positive + false positive == 0``, precision is undefined; + When ``true positive + false negative == 0``, recall is undefined. + In such cases, by default the metric will be set to 0, as will f-score, + and ``UndefinedMetricWarning`` will be raised. This behavior can be + modified with ``zero_division``. """ return fbeta_score(y_true, y_pred, 1, labels=labels, pos_label=pos_label, average=average, @@ -1146,7 +1153,7 @@ def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1, sample_weight : array-like of shape = [n_samples], optional Sample weights. - zero_division : string or int, default="warn" + zero_division : "warn", 0 or 1, default="warn" Sets the behavior when there is a zero division. If set to ("warn"|0)/1, returns 0/1 when both precision and recall are zero (calculated using the same value for this parameter). @@ -1185,7 +1192,14 @@ def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1, >>> fbeta_score(y_true, y_pred, average=None, beta=0.5) array([0.71..., 0. , 0. ]) + Notes + ----- + When ``true positive + false positive == 0`` or + ``true positive + false negative == 0``, f-score returns 0 and raises + ``UndefinedMetricWarning``. This behavior can be + modified with ``zero_division``. """ + _, _, f, _ = precision_recall_fscore_support(y_true, y_pred, beta=beta, labels=labels, @@ -1198,7 +1212,7 @@ def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1, def _prf_divide(numerator, denominator, metric, - modifier, average, warn_for, zero_division): + modifier, average, warn_for, zero_division="warn"): """Performs division and handles divide-by-zero. On zero-division, sets the corresponding result elements equal to @@ -1208,31 +1222,27 @@ def _prf_divide(numerator, denominator, metric, The metric, modifier and average arguments are used only for determining an appropriate warning. """ - # TODO: check new - # mask = denominator == 0.0 - # denominator = denominator.copy() - # denominator[mask] = 1 # avoid infs/nans - # result = numerator / denominator - result = numerator / denominator mask = denominator == 0.0 + denominator = denominator.copy() + denominator[mask] = 1 # avoid infs/nans + result = numerator / denominator + if not np.any(mask): return result - # remove infs + # if ``zero_division=1``, set those with denominator == 0 equal to 1 result[mask] = float(zero_division == 1) # the user will be removing warnings if zero_division is set to something - # different than its default value - if zero_division != "warn": + # different than its default value. If we are computing only f-score + # the warning will be raised only if precision and recall are ill-defined + if zero_division != "warn" or metric not in warn_for: return result # build appropriate warning # E.g. "Precision and F-score are ill-defined and being set to 0.0 in - # labels with no predicted samples" - axis0 = 'sample' - axis1 = 'label' - if average == 'samples': - axis0, axis1 = axis1, axis0 + # labels with no predicted samples. Use ``zero_division`` parameter to + # control this behavior." if metric in warn_for and 'f-score' in warn_for: msg_start = '{0} and F-score are'.format(metric.title()) @@ -1243,14 +1253,25 @@ def _prf_divide(numerator, denominator, metric, else: return result + msg = _build_prf_warning_message(average, modifier, msg_start, len(result)) + + warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) + return result + + +def _build_prf_warning_message(average, modifier, msg_start, result_size): + axis0 = 'sample' + axis1 = 'label' + if average == 'samples': + axis0, axis1 = axis1, axis0 msg = ('{0} ill-defined and being set to 0.0 {{0}} ' - 'no {1} {2}s.'.format(msg_start, modifier, axis0)) - if len(mask) == 1: + 'no {1} {2}s. Use ``zero_division`` parameter to control' + ' this behavior.'.format(msg_start, modifier, axis0)) + if result_size == 1: msg = msg.format('due to') else: msg = msg.format('in {0}s with'.format(axis1)) - warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) - return result + return msg def _check_set_wise_labels(y_true, y_pred, average, labels, pos_label): @@ -1376,7 +1397,7 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, sample_weight : array-like of shape = [n_samples], optional Sample weights. - zero_division : string or int, default="warn" + zero_division : "warn", 0 or 1, default="warn" Sets the behavior when there is a zero division. If set to ("warn"|0)/1, returns 0/1 for precision, recall, and F-measure when their computation implies a zero division. If @@ -1431,13 +1452,14 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, (array([0. , 0. , 0.66...]), array([0., 0., 1.]), array([0. , 0. , 0.8]), array([2, 2, 2])) - TODO: add all these Notes and add Unless zero_division... + Notes ----- When ``true positive + false positive == 0``, precision is undefined; When ``true positive + false negative == 0``, recall is undefined. - In such cases, the metric will be set to 0, as will f-score, and - ``UndefinedMetricWarning`` will be raised. + In such cases, by default the metric will be set to 0, as will f-score, + and ``UndefinedMetricWarning`` will be raised. This behavior can be + modified with ``zero_division``. """ _check_zero_division(zero_division) if beta < 0: @@ -1462,31 +1484,31 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, # Finally, we have all our sufficient statistics. Divide! # beta2 = beta ** 2 - # Divide, and on zero-division, set scores to 0 and warn: - + # Divide, and on zero-division, set scores and/or warn according to + # zero_division: precision = _prf_divide(tp_sum, pred_sum, 'precision', 'predicted', average, warn_for, zero_division) recall = _prf_divide(tp_sum, true_sum, 'recall', 'true', average, warn_for, zero_division) + # warn for f-score only if zero_division is warn, it is in warn_for + # and BOTH prec and rec are ill-defined + if zero_division == "warn" and ("f-score",) == warn_for: + if (pred_sum[true_sum == 0] == 0).any(): + msg = _build_prf_warning_message( + average, "true nor predicted", 'F-score is', len(true_sum) + ) + warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) - # TODO: check new version - - # if np.isposinf(beta): - # f_score = recall - # else: - # # Don't need to warn for F: either P or R warned, or tp == 0 where pos - # # and true are nonzero, in which case, F is well-defined and zero - # denom = beta2 * precision + recall - # denom[denom == 0.] = 1 # avoid division by 0 - # f_score = (1 + beta2) * precision * recall / denom + # if tp == 0 F will be 1 only if all predictions are zero, all labels are + # zero, and zero_division=1. In all other case, 0 + if np.isposinf(beta): + f_score = recall + else: + denom = beta2 * precision + recall - # Don't need to warn for F: either P or R warned, or tp == 0 where pos - # and true are nonzero, in which case, F is well-defined and zero - f_score = ((1 + beta2) * precision * recall / - (beta2 * precision + recall)) - f_score[tp_sum == 0] = 0.0 - f_score[(true_sum == 0) & (pred_sum == 0)] = float(zero_division == 1) + denom[denom == 0.] = 1 # avoid division by 0 + f_score = (1 + beta2) * precision * recall / denom # Average the results if average == 'weighted': @@ -1584,7 +1606,7 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1, sample_weight : array-like of shape = [n_samples], optional Sample weights. - zero_division : string or int, default="warn" + zero_division : "warn", 0 or 1, default="warn" Sets the behavior when there is a zero division. If set to ("warn"|0)/1, returns 0/1 when there are no positive predictions. If ``zero_division != "warn"``, warnings are suppressed @@ -1614,15 +1636,16 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1, >>> precision_score(y_true, y_pred, average=None) array([0.66..., 0. , 0. ]) >>> y_pred = [0, 0, 0, 0, 0, 0] - >>> precision_score(y_true, y_pred) # doctest: +ELLIPSIS + >>> precision_score(y_true, y_pred) 0.0... - >>> precision_score(y_true, y_pred, zero_division=1) # doctest: +ELLIPSIS + >>> precision_score(y_true, y_pred, zero_division=1) 1.0... - TODO + Notes ----- When ``true positive + false positive == 0``, precision returns 0 and - raises ``UndefinedMetricWarning``. + raises ``UndefinedMetricWarning``. This behavior can be + modified with ``zero_division``. """ p, _, _, _ = precision_recall_fscore_support(y_true, y_pred, @@ -1701,7 +1724,7 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary', sample_weight : array-like of shape = [n_samples], optional Sample weights. - zero_division : string or int, default="warn" + zero_division : "warn", 0 or 1, default="warn" Sets the behavior when there is a zero division. If set to ("warn"|0)/1, returns 0/1 when there are no positive labels. If ``zero_division != "warn"``, warnings are suppressed @@ -1736,11 +1759,12 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary', 0.0... >>> recall_score(y_true, y_pred, zero_division=1) 1.0... - TODO + Notes ----- When ``true positive + false negative == 0``, recall returns 0 and raises - ``UndefinedMetricWarning``. + ``UndefinedMetricWarning``. This behavior can be modified with + ``zero_division``. """ _, r, _, _ = precision_recall_fscore_support(y_true, y_pred, labels=labels, @@ -1861,7 +1885,7 @@ def classification_report(y_true, y_pred, labels=None, target_names=None, output_dict : bool (default = False) If True, return output as dict - zero_division : string or int, default="warn" + zero_division : "warn", 0 or 1, default="warn" Sets the behavior when there is a zero division. If set to ("warn"|0)/1, returns 0/1 for precision, recall, and f1 when their computation implies a zero division. If ``zero_division != "warn"``, diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 06d08b4a25c82..e04cb7f91dc62 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -15,7 +15,6 @@ from sklearn.utils.validation import check_random_state from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal @@ -287,7 +286,7 @@ def test_precision_recall_f_ignored_labels(): # ensure the above were meaningful tests: for average in ['macro', 'weighted', 'micro']: assert (recall_13(average=average) != - recall_all(average=average)) + recall_all(average=average)) def test_average_precision_score_score_non_binary_class(): @@ -1533,7 +1532,7 @@ def test_precision_recall_f1_no_labels(beta, average, zero_division): assert_almost_equal(p, zero_division) assert_almost_equal(r, zero_division) assert_almost_equal(f, zero_division) - assert_equal(s, None) + assert s is None assert_almost_equal(fbeta, float(zero_division == 1)) @@ -1591,27 +1590,35 @@ def test_prf_warnings(zero_division): for average in [None, 'weighted', 'macro']: msg = ('Precision and F-score are ill-defined and ' - 'being set to 0.0 in labels with no predicted samples.') + 'being set to 0.0 in labels with no predicted samples.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') tmp = [w, msg, f] if zero_division == "warn" else [f] my_assert(*tmp, [0, 1, 2], [1, 1, 2], average=average, zero_division=zero_division) msg = ('Recall and F-score are ill-defined and ' - 'being set to 0.0 in labels with no true samples.') + 'being set to 0.0 in labels with no true samples.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') tmp = [w, msg, f] if zero_division == "warn" else [f] my_assert(*tmp, [1, 1, 2], [0, 1, 2], average=average, zero_division=zero_division) # average of per-sample scores msg = ('Precision and F-score are ill-defined and ' - 'being set to 0.0 in samples with no predicted labels.') + 'being set to 0.0 in samples with no predicted labels.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') tmp = [w, msg, f] if zero_division == "warn" else [f] my_assert(*tmp, np.array([[1, 0], [1, 0]]), np.array([[1, 0], [0, 0]]), average='samples', zero_division=zero_division) msg = ('Recall and F-score are ill-defined and ' - 'being set to 0.0 in samples with no true labels.') + 'being set to 0.0 in samples with no true labels.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') tmp = [w, msg, f] if zero_division == "warn" else [f] my_assert(*tmp, np.array([[1, 0], [0, 0]]), np.array([[1, 0], [1, 0]]), @@ -1619,14 +1626,18 @@ def test_prf_warnings(zero_division): # single score: micro-average msg = ('Precision and F-score are ill-defined and ' - 'being set to 0.0 due to no predicted samples.') + 'being set to 0.0 due to no predicted samples.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') tmp = [w, msg, f] if zero_division == "warn" else [f] my_assert(*tmp, np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), average='micro', zero_division=zero_division) msg = ('Recall and F-score are ill-defined and ' - 'being set to 0.0 due to no true samples.') + 'being set to 0.0 due to no true samples.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') tmp = [w, msg, f] if zero_division == "warn" else [f] my_assert(*tmp, np.array([[0, 0], [0, 0]]), np.array([[1, 1], [1, 1]]), average='micro', @@ -1634,26 +1645,38 @@ def test_prf_warnings(zero_division): # single positive label msg = ('Precision and F-score are ill-defined and ' - 'being set to 0.0 due to no predicted samples.') + 'being set to 0.0 due to no predicted samples.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') tmp = [w, msg, f] if zero_division == "warn" else [f] my_assert(*tmp, [1, 1], [-1, -1], average='binary', zero_division=zero_division) msg = ('Recall and F-score are ill-defined and ' - 'being set to 0.0 due to no true samples.') + 'being set to 0.0 due to no true samples.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') tmp = [w, msg, f] if zero_division == "warn" else [f] my_assert(*tmp, [-1, -1], [1, 1], average='binary', zero_division=zero_division) - # TODO + with warnings.catch_warnings(record=True) as record: warnings.simplefilter('always') - precision_recall_fscore_support([0, 0], [0, 0], average="binary") - msg = ('Recall and F-score are ill-defined and ' - 'being set to 0.0 due to no true samples.') - assert str(record.pop().message) == msg - msg = ('Precision and F-score are ill-defined and ' - 'being set to 0.0 due to no predicted samples.') - assert str(record.pop().message) == msg + precision_recall_fscore_support([0, 0], [0, 0], average="binary", + zero_division=zero_division) + if zero_division == "warn": + msg = ('Recall and F-score are ill-defined and ' + 'being set to 0.0 due to no true samples.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') + assert str(record.pop().message) == msg + msg = ('Precision and F-score are ill-defined and ' + 'being set to 0.0 due to no predicted samples.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') + assert str(record.pop().message) == msg + else: + assert len(record) == 0 @pytest.mark.parametrize('zero_division', ["warn", 0, 1]) @@ -1668,9 +1691,11 @@ def test_recall_warnings(zero_division): np.array([[1, 1], [1, 1]]), average='micro', zero_division=zero_division) if zero_division == "warn": - assert_equal(str(record.pop().message), - 'Recall is ill-defined and ' - 'being set to 0.0 due to no true samples.') + assert (str(record.pop().message) == + 'Recall is ill-defined and ' + 'being set to 0.0 due to no true samples.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') else: assert len(record) == 0 @@ -1678,21 +1703,24 @@ def test_recall_warnings(zero_division): if zero_division == "warn": assert (str(record.pop().message) == 'Recall is ill-defined and ' - 'being set to 0.0 due to no true samples.') + 'being set to 0.0 due to no true samples.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') @pytest.mark.parametrize('zero_division', ["warn", 0, 1]) def test_precision_warnings(zero_division): with warnings.catch_warnings(record=True) as record: warnings.simplefilter('always') - precision_score(np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), average='micro', zero_division=zero_division) if zero_division == "warn": assert (str(record.pop().message) == 'Precision is ill-defined and ' - 'being set to 0.0 due to no predicted samples.') + 'being set to 0.0 due to no predicted samples.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') else: assert len(record) == 0 @@ -1700,7 +1728,9 @@ def test_precision_warnings(zero_division): if zero_division == "warn": assert (str(record.pop().message) == 'Precision is ill-defined and ' - 'being set to 0.0 due to no predicted samples.') + 'being set to 0.0 due to no predicted samples.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') assert_no_warnings(precision_score, np.array([[0, 0], [0, 0]]), @@ -1717,36 +1747,26 @@ def test_fscore_warnings(zero_division): score(np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), average='micro', zero_division=zero_division) - if zero_division == "warn": - assert (str(record.pop().message) == - 'F-score is ill-defined and ' - 'being set to 0.0 due to no predicted samples.') - else: - assert len(record) == 0 + assert len(record) == 0 score(np.array([[0, 0], [0, 0]]), np.array([[1, 1], [1, 1]]), average='micro', zero_division=zero_division) - if zero_division == "warn": - assert_equal(str(record.pop().message), - 'F-score is ill-defined and ' - 'being set to 0.0 due to no true samples.') - else: - assert len(record) == 0 + assert len(record) == 0 - score([0, 0], [0, 0]) + score(np.array([[0, 0], [0, 0]]), + np.array([[0, 0], [0, 0]]), + average='micro', zero_division=zero_division) if zero_division == "warn": assert (str(record.pop().message) == 'F-score is ill-defined and ' - 'being set to 0.0 due to no true samples.') - assert (str(record.pop().message) == - 'F-score is ill-defined and ' - 'being set to 0.0 due to no predicted samples.') + 'being set to 0.0 due to no true nor predicted ' + 'samples. Use ``zero_division`` parameter to ' + 'control this behavior.') else: assert len(record) == 0 - def test_prf_average_binary_data_non_binary(): # Error if user does not explicitly set non-binary average mode y_true_mc = [1, 2, 3, 3] @@ -1902,7 +1922,7 @@ def test_hinge_loss_multiclass(): np.clip(dummy_losses, 0, None, out=dummy_losses) dummy_hinge_loss = np.mean(dummy_losses) assert (hinge_loss(y_true, pred_decision) == - dummy_hinge_loss) + dummy_hinge_loss) def test_hinge_loss_multiclass_missing_labels_with_labels_none(): @@ -1940,7 +1960,7 @@ def test_hinge_loss_multiclass_with_missing_labels(): np.clip(dummy_losses, 0, None, out=dummy_losses) dummy_hinge_loss = np.mean(dummy_losses) assert (hinge_loss(y_true, pred_decision, labels=labels) == - dummy_hinge_loss) + dummy_hinge_loss) def test_hinge_loss_multiclass_invariance_lists(): @@ -1967,7 +1987,7 @@ def test_hinge_loss_multiclass_invariance_lists(): np.clip(dummy_losses, 0, None, out=dummy_losses) dummy_hinge_loss = np.mean(dummy_losses) assert (hinge_loss(y_true, pred_decision) == - dummy_hinge_loss) + dummy_hinge_loss) def test_log_loss(): From 29d1109c7461a3791be5030718b5802cec54e4a4 Mon Sep 17 00:00:00 2001 From: marctorrellas Date: Sat, 7 Sep 2019 13:31:54 +0100 Subject: [PATCH 03/10] fixed "[...0 0...]" --> "[...0, 0...]" in docstring --- sklearn/metrics/classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 166e8a029aa1d..aceb7e7b208c9 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1063,7 +1063,7 @@ def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary', 0.26... >>> f1_score(y_true, y_pred, average=None) array([0.8, 0. , 0. ]) - >>> y_true = [0, 0, 0 0, 0, 0] + >>> y_true = [0, 0, 0, 0, 0, 0] >>> y_pred = [0, 0, 0, 0, 0, 0] >>> f1_score(y_true, y_pred, zero_division=1) 1.0... From f4e85e69967d57a353c3a7ca35857ee08ba4ac37 Mon Sep 17 00:00:00 2001 From: marctorrellas Date: Sat, 7 Sep 2019 13:47:09 +0100 Subject: [PATCH 04/10] corrected docstring examples --- sklearn/metrics/classification.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index aceb7e7b208c9..8e27f265e76a1 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1636,10 +1636,10 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1, >>> precision_score(y_true, y_pred, average=None) array([0.66..., 0. , 0. ]) >>> y_pred = [0, 0, 0, 0, 0, 0] - >>> precision_score(y_true, y_pred) - 0.0... + >>> precision_score(y_true, y_pred, average=None) + array([0.33..., 0. , 0. ]) >>> precision_score(y_true, y_pred, zero_division=1) - 1.0... + array([0.33..., 1. , 1. ]) Notes ----- @@ -1755,10 +1755,10 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary', >>> recall_score(y_true, y_pred, average=None) array([1., 0., 0.]) >>> y_true = [0, 0, 0, 0, 0, 0] - >>> recall_score(y_true, y_pred) - 0.0... - >>> recall_score(y_true, y_pred, zero_division=1) - 1.0... + >>> recall_score(y_true, y_pred, average=None) + array([0.5, 0., 0.]) + >>> recall_score(y_true, y_pred, average=None, zero_division=1) + array([0.5, 1., 1.]) Notes ----- From 446f878fe4330facdb620996384fb7f45be57951 Mon Sep 17 00:00:00 2001 From: marctorrellas Date: Sat, 7 Sep 2019 14:18:05 +0100 Subject: [PATCH 05/10] corrected docstring examples (again) --- sklearn/metrics/classification.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 8e27f265e76a1..85b7a4de862a5 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1638,7 +1638,7 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1, >>> y_pred = [0, 0, 0, 0, 0, 0] >>> precision_score(y_true, y_pred, average=None) array([0.33..., 0. , 0. ]) - >>> precision_score(y_true, y_pred, zero_division=1) + >>> precision_score(y_true, y_pred, average=None, zero_division=1) array([0.33..., 1. , 1. ]) Notes @@ -1756,9 +1756,9 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary', array([1., 0., 0.]) >>> y_true = [0, 0, 0, 0, 0, 0] >>> recall_score(y_true, y_pred, average=None) - array([0.5, 0., 0.]) + array([0.5, 0. , 0. ]) >>> recall_score(y_true, y_pred, average=None, zero_division=1) - array([0.5, 1., 1.]) + array([0.5, 1. , 1. ]) Notes ----- From e189423d32f793bdfbe67c9483b45096f8f4d123 Mon Sep 17 00:00:00 2001 From: marctorrellas Date: Wed, 11 Sep 2019 19:55:31 +0100 Subject: [PATCH 06/10] - tests for warn and [0,1] separated to make them more clear; - better docstrings - more explicit use of zero_division value --- sklearn/metrics/classification.py | 60 +++--- sklearn/metrics/tests/test_classification.py | 214 ++++++++++++------- 2 files changed, 167 insertions(+), 107 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 85b7a4de862a5..9ced21519dd5d 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -43,13 +43,13 @@ def _check_zero_division(zero_division): if isinstance(zero_division, str): - if zero_division != "warn": - raise ValueError('zero_division must be one of ["warn", 0, 1]') + if zero_division == "warn": + return elif isinstance(zero_division, (int, float)): - if zero_division not in [0, 1]: - raise ValueError('zero_division must be one of ["warn", 0, 1]') - else: - raise TypeError('zero_division must be one of ["warn", 0, 1]') + if zero_division in [0, 1]: + return + raise ValueError(f'Got zero_division={zero_division}.' + f' Must be one of ["warn", 0, 1]') def _check_targets(y_true, y_pred): @@ -1029,10 +1029,9 @@ def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary', Sample weights. zero_division : "warn", 0 or 1, default="warn" - Sets the behavior when there is a zero division. If set to - ("warn"|0)/1, returns 0/1 when both precision and recall are zero - (calculated using the same value for this parameter). - If ``zero_division != "warn"``, warnings are suppressed + Sets the value to return when there is a zero division, i.e. when all + predictions and labels are negative. If set to "warn", this acts as 0, + but warnings are also raised. Returns ------- @@ -1154,10 +1153,9 @@ def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1, Sample weights. zero_division : "warn", 0 or 1, default="warn" - Sets the behavior when there is a zero division. If set to - ("warn"|0)/1, returns 0/1 when both precision and recall are zero - (calculated using the same value for this parameter). - If ``zero_division != "warn"``, warnings are suppressed + Sets the value to return when there is a zero division, i.e. when all + predictions and labels are negative. If set to "warn", this acts as 0, + but warnings are also raised. Returns ------- @@ -1231,7 +1229,7 @@ def _prf_divide(numerator, denominator, metric, return result # if ``zero_division=1``, set those with denominator == 0 equal to 1 - result[mask] = float(zero_division == 1) + result[mask] = 0.0 if zero_division in ["warn", 0] else 1.0 # the user will be removing warnings if zero_division is set to something # different than its default value. If we are computing only f-score @@ -1398,10 +1396,11 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, Sample weights. zero_division : "warn", 0 or 1, default="warn" - Sets the behavior when there is a zero division. If set to - ("warn"|0)/1, returns 0/1 for precision, recall, and F-measure when - their computation implies a zero division. If - ``zero_division != "warn"``, warnings are suppressed + Sets the value to return when there is a zero division: + - recall: when there are no positive labels + - precision: when there are no positive predictions + - f-score: both + If set to "warn", this acts as 0, but warnings are also raised. Returns ------- @@ -1514,13 +1513,14 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, if average == 'weighted': weights = true_sum if weights.sum() == 0: + zero_division_value = 0.0 if zero_division in ["warn", 0] else 1.0 # precision is zero_division if there are no positive predictions # recall is zero_division if there are no positive labels # fscore is zero_division if all labels AND predictions are # negative - return (float(zero_division == 1) if pred_sum.sum() == 0 else 0, - float(zero_division == 1), - float(zero_division == 1) if pred_sum.sum() == 0 else 0, + return (zero_division_value if pred_sum.sum() == 0 else 0, + zero_division_value, + zero_division_value if pred_sum.sum() == 0 else 0, None) elif average == 'samples': @@ -1607,9 +1607,8 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1, Sample weights. zero_division : "warn", 0 or 1, default="warn" - Sets the behavior when there is a zero division. If set to - ("warn"|0)/1, returns 0/1 when there are no positive predictions. - If ``zero_division != "warn"``, warnings are suppressed + Sets the value to return when there is a zero division. If set to + "warn", this acts as 0, but warnings are also raised. Returns ------- @@ -1725,9 +1724,8 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary', Sample weights. zero_division : "warn", 0 or 1, default="warn" - Sets the behavior when there is a zero division. If set to - ("warn"|0)/1, returns 0/1 when there are no positive labels. - If ``zero_division != "warn"``, warnings are suppressed + Sets the value to return when there is a zero division. If set to + "warn", this acts as 0, but warnings are also raised. Returns ------- @@ -1886,10 +1884,8 @@ def classification_report(y_true, y_pred, labels=None, target_names=None, If True, return output as dict zero_division : "warn", 0 or 1, default="warn" - Sets the behavior when there is a zero division. If set to - ("warn"|0)/1, returns 0/1 for precision, recall, and f1 when their - computation implies a zero division. If ``zero_division != "warn"``, - warnings are suppressed + Sets the value to return when there is a zero division. If set to + "warn", this acts as 0, but warnings are also raised. Returns ------- diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index e04cb7f91dc62..21cff91b29827 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1511,38 +1511,50 @@ def test_precision_recall_f1_score_with_an_empty_prediction(): @pytest.mark.parametrize('beta', [1]) @pytest.mark.parametrize('average', ["macro", "micro", "weighted", "samples"]) -@pytest.mark.parametrize('zero_division', ["warn", 0, 1]) +@pytest.mark.parametrize('zero_division', [0, 1]) def test_precision_recall_f1_no_labels(beta, average, zero_division): y_true = np.zeros((20, 3)) y_pred = np.zeros_like(y_true) - func = precision_recall_fscore_support - my_assert = (assert_warns if zero_division == "warn" - else assert_no_warnings) - tmp = ([UndefinedMetricWarning, func] if zero_division == "warn" - else [func]) - p, r, f, s = my_assert(*tmp, y_true, y_pred, average=average, - beta=beta, zero_division=zero_division) - tmp = ([UndefinedMetricWarning, fbeta_score] if zero_division == "warn" - else [fbeta_score]) - fbeta = my_assert(*tmp, y_true, y_pred, beta=beta, - average=average, zero_division=zero_division) - - zero_division = float(zero_division == 1) + p, r, f, s = assert_no_warnings(precision_recall_fscore_support, y_true, + y_pred, average=average, beta=beta, + zero_division=zero_division) + fbeta = assert_no_warnings(fbeta_score, y_true, y_pred, beta=beta, + average=average, zero_division=zero_division) + + zero_division = float(zero_division) assert_almost_equal(p, zero_division) assert_almost_equal(r, zero_division) assert_almost_equal(f, zero_division) assert s is None - assert_almost_equal(fbeta, float(zero_division == 1)) + assert_almost_equal(fbeta, float(zero_division)) -@pytest.mark.parametrize('zero_division', ["warn", 0, 1]) -def test_precision_recall_f1_no_labels_average_none(zero_division): +@pytest.mark.parametrize('beta', [1]) +@pytest.mark.parametrize('average', ["macro", "micro", "weighted", "samples"]) +def test_precision_recall_f1_no_labels_warn(beta, average): y_true = np.zeros((20, 3)) y_pred = np.zeros_like(y_true) - beta = 1 + func = precision_recall_fscore_support + p, r, f, s = assert_warns(UndefinedMetricWarning, func, y_true, y_pred, + average=average, beta=beta) + fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred, + average=average, beta=beta) + + assert_almost_equal(p, 0) + assert_almost_equal(r, 0) + assert_almost_equal(f, 0) + assert s is None + + assert_almost_equal(fbeta, 0) + + +@pytest.mark.parametrize('zero_division', [0, 1]) +def test_precision_recall_f1_no_labels_average_none(zero_division): + y_true = np.zeros((20, 3)) + y_pred = np.zeros_like(y_true) # tp = [0, 0, 0] # fn = [0, 0, 0] @@ -1552,19 +1564,14 @@ def test_precision_recall_f1_no_labels_average_none(zero_division): # |y_i| = [0, 0, 0] # |y_hat_i| = [0, 0, 0] - func = precision_recall_fscore_support - my_assert = (assert_warns if zero_division == "warn" - else assert_no_warnings) - tmp = ([UndefinedMetricWarning, func] if zero_division == "warn" - else [func]) - p, r, f, s = my_assert(*tmp, y_true, y_pred, average=None, - beta=beta, zero_division=zero_division) - tmp = ([UndefinedMetricWarning, fbeta_score] if zero_division == "warn" - else [fbeta_score]) - fbeta = my_assert(*tmp, y_true, y_pred, beta=beta, - average=None, zero_division=zero_division) - - zero_division = float(zero_division == 1) + p, r, f, s = assert_no_warnings(precision_recall_fscore_support, + y_true, y_pred, + average=None, beta=1, + zero_division=zero_division) + fbeta = assert_no_warnings(fbeta_score, y_true, y_pred, beta=1, + average=None, zero_division=zero_division) + + zero_division = float(zero_division) assert_array_almost_equal( p, [zero_division, zero_division, zero_division], 2 ) @@ -1581,102 +1588,159 @@ def test_precision_recall_f1_no_labels_average_none(zero_division): ) -@pytest.mark.parametrize('zero_division', ["warn", 0, 1]) +def test_precision_recall_f1_no_labels_average_none_warn(): + y_true = np.zeros((20, 3)) + y_pred = np.zeros_like(y_true) + + # tp = [0, 0, 0] + # fn = [0, 0, 0] + # fp = [0, 0, 0] + # support = [0, 0, 0] + # |y_hat_i inter y_i | = [0, 0, 0] + # |y_i| = [0, 0, 0] + # |y_hat_i| = [0, 0, 0] + + p, r, f, s = assert_warns(UndefinedMetricWarning, + precision_recall_fscore_support, + y_true, y_pred, average=None, beta=1) + fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred, + beta=1, average=None) + + assert_array_almost_equal(p, [0, 0, 0], 2) + assert_array_almost_equal(r, [0, 0, 0], 2) + assert_array_almost_equal(f, [0, 0, 0], 2) + assert_array_almost_equal(s, [0, 0, 0], 2) + + assert_array_almost_equal(fbeta, [0, 0, 0], 2) + + +@pytest.mark.parametrize('zero_division', ["warn"]) def test_prf_warnings(zero_division): # average of per-label scores f, w = precision_recall_fscore_support, UndefinedMetricWarning - my_assert = (assert_warns_message - if zero_division == "warn" else assert_no_warnings) for average in [None, 'weighted', 'macro']: msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 in labels with no predicted samples.' ' Use ``zero_division`` parameter to control' ' this behavior.') - tmp = [w, msg, f] if zero_division == "warn" else [f] - my_assert(*tmp, [0, 1, 2], [1, 1, 2], average=average, - zero_division=zero_division) + assert_warns_message(w, msg, f, [0, 1, 2], [1, 1, 2], average=average, + zero_division=zero_division) msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 in labels with no true samples.' ' Use ``zero_division`` parameter to control' ' this behavior.') - tmp = [w, msg, f] if zero_division == "warn" else [f] - my_assert(*tmp, [1, 1, 2], [0, 1, 2], average=average, - zero_division=zero_division) + assert_warns_message(w, msg, f, [1, 1, 2], [0, 1, 2], average=average, + zero_division=zero_division) # average of per-sample scores msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 in samples with no predicted labels.' ' Use ``zero_division`` parameter to control' ' this behavior.') - tmp = [w, msg, f] if zero_division == "warn" else [f] - my_assert(*tmp, np.array([[1, 0], [1, 0]]), - np.array([[1, 0], [0, 0]]), average='samples', - zero_division=zero_division) + assert_warns_message(w, msg, f, np.array([[1, 0], [1, 0]]), + np.array([[1, 0], [0, 0]]), average='samples', + zero_division=zero_division) msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 in samples with no true labels.' ' Use ``zero_division`` parameter to control' ' this behavior.') - tmp = [w, msg, f] if zero_division == "warn" else [f] - my_assert(*tmp, np.array([[1, 0], [0, 0]]), - np.array([[1, 0], [1, 0]]), - average='samples', zero_division=zero_division) + assert_warns_message(w, msg, f, np.array([[1, 0], [0, 0]]), + np.array([[1, 0], [1, 0]]), + average='samples', zero_division=zero_division) # single score: micro-average msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 due to no predicted samples.' ' Use ``zero_division`` parameter to control' ' this behavior.') - tmp = [w, msg, f] if zero_division == "warn" else [f] - my_assert(*tmp, np.array([[1, 1], [1, 1]]), - np.array([[0, 0], [0, 0]]), average='micro', - zero_division=zero_division) + assert_warns_message(w, msg, f, np.array([[1, 1], [1, 1]]), + np.array([[0, 0], [0, 0]]), average='micro', + zero_division=zero_division) msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 due to no true samples.' ' Use ``zero_division`` parameter to control' ' this behavior.') - tmp = [w, msg, f] if zero_division == "warn" else [f] - my_assert(*tmp, np.array([[0, 0], [0, 0]]), - np.array([[1, 1], [1, 1]]), average='micro', - zero_division=zero_division) + assert_warns_message(w, msg, f, np.array([[0, 0], [0, 0]]), + np.array([[1, 1], [1, 1]]), average='micro', + zero_division=zero_division) # single positive label msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 due to no predicted samples.' ' Use ``zero_division`` parameter to control' ' this behavior.') - tmp = [w, msg, f] if zero_division == "warn" else [f] - my_assert(*tmp, [1, 1], [-1, -1], average='binary', - zero_division=zero_division) + assert_warns_message(w, msg, f, [1, 1], [-1, -1], average='binary', + zero_division=zero_division) msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 due to no true samples.' ' Use ``zero_division`` parameter to control' ' this behavior.') - tmp = [w, msg, f] if zero_division == "warn" else [f] - my_assert(*tmp, [-1, -1], [1, 1], average='binary', - zero_division=zero_division) + assert_warns_message(w, msg, f, [-1, -1], [1, 1], average='binary', + zero_division=zero_division) with warnings.catch_warnings(record=True) as record: warnings.simplefilter('always') precision_recall_fscore_support([0, 0], [0, 0], average="binary", zero_division=zero_division) - if zero_division == "warn": - msg = ('Recall and F-score are ill-defined and ' - 'being set to 0.0 due to no true samples.' - ' Use ``zero_division`` parameter to control' - ' this behavior.') - assert str(record.pop().message) == msg - msg = ('Precision and F-score are ill-defined and ' - 'being set to 0.0 due to no predicted samples.' - ' Use ``zero_division`` parameter to control' - ' this behavior.') - assert str(record.pop().message) == msg - else: - assert len(record) == 0 + msg = ('Recall and F-score are ill-defined and ' + 'being set to 0.0 due to no true samples.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') + assert str(record.pop().message) == msg + msg = ('Precision and F-score are ill-defined and ' + 'being set to 0.0 due to no predicted samples.' + ' Use ``zero_division`` parameter to control' + ' this behavior.') + assert str(record.pop().message) == msg + + +@pytest.mark.parametrize('zero_division', [0, 1]) +def test_prf_no_warnings_if_zero_division_set(zero_division): + # average of per-label scores + f = precision_recall_fscore_support + for average in [None, 'weighted', 'macro']: + + assert_no_warnings(f, [0, 1, 2], [1, 1, 2], average=average, + zero_division=zero_division) + + assert_no_warnings(f, [1, 1, 2], [0, 1, 2], average=average, + zero_division=zero_division) + + # average of per-sample scores + assert_no_warnings(f, np.array([[1, 0], [1, 0]]), + np.array([[1, 0], [0, 0]]), average='samples', + zero_division=zero_division) + + assert_no_warnings(f, np.array([[1, 0], [0, 0]]), + np.array([[1, 0], [1, 0]]), + average='samples', zero_division=zero_division) + + # single score: micro-average + assert_no_warnings(f, np.array([[1, 1], [1, 1]]), + np.array([[0, 0], [0, 0]]), average='micro', + zero_division=zero_division) + + assert_no_warnings(f, np.array([[0, 0], [0, 0]]), + np.array([[1, 1], [1, 1]]), average='micro', + zero_division=zero_division) + + # single positive label + assert_no_warnings(f, [1, 1], [-1, -1], average='binary', + zero_division=zero_division) + + assert_no_warnings(f, [-1, -1], [1, 1], average='binary', + zero_division=zero_division) + + with warnings.catch_warnings(record=True) as record: + warnings.simplefilter('always') + precision_recall_fscore_support([0, 0], [0, 0], average="binary", + zero_division=zero_division) + assert len(record) == 0 @pytest.mark.parametrize('zero_division', ["warn", 0, 1]) From 3befe311e15d2da34e14ace8feaa7e1582498baf Mon Sep 17 00:00:00 2001 From: marctorrellas Date: Tue, 24 Sep 2019 09:06:12 +0100 Subject: [PATCH 07/10] - removed fstring to make compatible with python<3.6 --- sklearn/metrics/classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index b9bf84a9409b8..1120fbd593fe4 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -48,8 +48,8 @@ def _check_zero_division(zero_division): elif isinstance(zero_division, (int, float)): if zero_division in [0, 1]: return - raise ValueError(f'Got zero_division={zero_division}.' - f' Must be one of ["warn", 0, 1]') + raise ValueError('Got zero_division={0}.' + ' Must be one of ["warn", 0, 1]'.format(zero_division)) def _check_targets(y_true, y_pred): From 7a2bc7db466ffec6f6f2338a17a995b10e7a221b Mon Sep 17 00:00:00 2001 From: marctorrellas Date: Wed, 25 Sep 2019 09:17:14 +0100 Subject: [PATCH 08/10] - reverted changes to avoid flake8 warnings - added tests for YTN or YPN to check prec/rec with zero_division value - cleaner tests --- sklearn/metrics/classification.py | 3 +- sklearn/metrics/tests/test_classification.py | 96 ++++++++++---------- 2 files changed, 48 insertions(+), 51 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 1120fbd593fe4..5a5f75dffc37b 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -2176,8 +2176,7 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None, y_true : array-like or label indicator matrix Ground truth (correct) labels for n_samples samples. - y_pred : array-like of float, shape = (n_samples, n_classes) or - (n_samples,) + y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,) Predicted probabilities, as returned by a classifier's predict_proba method. If ``y_pred.shape = (n_samples,)`` the probabilities provided are assumed to be that of the diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 42b5fa7867388..9a093003e9983 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -534,12 +534,10 @@ def test_cohen_kappa(): y1 = np.array([0] * 46 + [1] * 44 + [2] * 10) y2 = np.array([0] * 50 + [1] * 40 + [2] * 10) assert_almost_equal(cohen_kappa_score(y1, y2), .9315, decimal=4) - assert_almost_equal( - cohen_kappa_score(y1, y2, weights="linear"), 0.9412, decimal=4 - ) - assert_almost_equal( - cohen_kappa_score(y1, y2, weights="quadratic"), 0.9541, decimal=4 - ) + assert_almost_equal(cohen_kappa_score(y1, y2, + weights="linear"), 0.9412, decimal=4) + assert_almost_equal(cohen_kappa_score(y1, y2, + weights="quadratic"), 0.9541, decimal=4) @ignore_warnings @@ -1452,28 +1450,33 @@ def test_precision_recall_f1_score_multilabel_2(): @ignore_warnings -def test_precision_recall_f1_score_with_an_empty_prediction(): +@pytest.mark.parametrize('zero_division', ["warn", 0, 1]) +def test_precision_recall_f1_score_with_an_empty_prediction(zero_division): y_true = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 1, 0]]) y_pred = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 1, 0]]) # true_pos = [ 0. 1. 1. 0.] # false_pos = [ 0. 0. 0. 1.] # false_neg = [ 1. 1. 0. 0.] + zero_division = 1.0 if zero_division == 1.0 else 0.0 p, r, f, s = precision_recall_fscore_support(y_true, y_pred, - average=None) - assert_array_almost_equal(p, [0.0, 1.0, 1.0, 0.0], 2) - assert_array_almost_equal(r, [0.0, 0.5, 1.0, 0.0], 2) + average=None, + zero_division=zero_division) + assert_array_almost_equal(p, [zero_division, 1.0, 1.0, 0.0], 2) + assert_array_almost_equal(r, [0.0, 0.5, 1.0, zero_division], 2) assert_array_almost_equal(f, [0.0, 1 / 1.5, 1, 0.0], 2) assert_array_almost_equal(s, [1, 2, 1, 0], 2) - f2 = fbeta_score(y_true, y_pred, beta=2, average=None) + f2 = fbeta_score(y_true, y_pred, beta=2, average=None, + zero_division=zero_division) support = s assert_array_almost_equal(f2, [0, 0.55, 1, 0], 2) p, r, f, s = precision_recall_fscore_support(y_true, y_pred, - average="macro") - assert_almost_equal(p, 0.5) - assert_almost_equal(r, 1.5 / 4) + average="macro", + zero_division=zero_division) + assert_almost_equal(p, (2 + zero_division) / 4) + assert_almost_equal(r, (1.5 + zero_division) / 4) assert_almost_equal(f, 2.5 / (4 * 1.5)) assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, @@ -1481,24 +1484,29 @@ def test_precision_recall_f1_score_with_an_empty_prediction(): np.mean(f2)) p, r, f, s = precision_recall_fscore_support(y_true, y_pred, - average="micro") + average="micro", + zero_division=zero_division) assert_almost_equal(p, 2 / 3) assert_almost_equal(r, 0.5) assert_almost_equal(f, 2 / 3 / (2 / 3 + 0.5)) assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, - average="micro"), + average="micro", + zero_division=zero_division), (1 + 4) * p * r / (4 * p + r)) p, r, f, s = precision_recall_fscore_support(y_true, y_pred, - average="weighted") - assert_almost_equal(p, 3 / 4) + average="weighted", + zero_division=zero_division) + assert_almost_equal(p, 3 / 4 if zero_division == 0 else 1.0) assert_almost_equal(r, 0.5) assert_almost_equal(f, (2 / 1.5 + 1) / 4) assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, - average="weighted"), - np.average(f2, weights=support)) + average="weighted", + zero_division=zero_division), + np.average(f2, weights=support), + ) p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average="samples") @@ -1510,7 +1518,8 @@ def test_precision_recall_f1_score_with_an_empty_prediction(): assert_almost_equal(f, 1 / 3) assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, - average="samples"), + average="samples", + zero_division=zero_division), 0.333, 2) @@ -1536,17 +1545,16 @@ def test_precision_recall_f1_no_labels(beta, average, zero_division): assert_almost_equal(fbeta, float(zero_division)) -@pytest.mark.parametrize('beta', [1]) @pytest.mark.parametrize('average', ["macro", "micro", "weighted", "samples"]) -def test_precision_recall_f1_no_labels_warn(beta, average): +def test_precision_recall_f1_no_labels_check_warnings(average): y_true = np.zeros((20, 3)) y_pred = np.zeros_like(y_true) func = precision_recall_fscore_support p, r, f, s = assert_warns(UndefinedMetricWarning, func, y_true, y_pred, - average=average, beta=beta) + average=average, beta=1.0) fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred, - average=average, beta=beta) + average=average, beta=1.0) assert_almost_equal(p, 0) assert_almost_equal(r, 0) @@ -1571,9 +1579,9 @@ def test_precision_recall_f1_no_labels_average_none(zero_division): p, r, f, s = assert_no_warnings(precision_recall_fscore_support, y_true, y_pred, - average=None, beta=1, + average=None, beta=1.0, zero_division=zero_division) - fbeta = assert_no_warnings(fbeta_score, y_true, y_pred, beta=1, + fbeta = assert_no_warnings(fbeta_score, y_true, y_pred, beta=1.0, average=None, zero_division=zero_division) zero_division = float(zero_division) @@ -1619,8 +1627,7 @@ def test_precision_recall_f1_no_labels_average_none_warn(): assert_array_almost_equal(fbeta, [0, 0, 0], 2) -@pytest.mark.parametrize('zero_division', ["warn"]) -def test_prf_warnings(zero_division): +def test_prf_warnings(): # average of per-label scores f, w = precision_recall_fscore_support, UndefinedMetricWarning for average in [None, 'weighted', 'macro']: @@ -1629,15 +1636,13 @@ def test_prf_warnings(zero_division): 'being set to 0.0 in labels with no predicted samples.' ' Use ``zero_division`` parameter to control' ' this behavior.') - assert_warns_message(w, msg, f, [0, 1, 2], [1, 1, 2], average=average, - zero_division=zero_division) + assert_warns_message(w, msg, f, [0, 1, 2], [1, 1, 2], average=average) msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 in labels with no true samples.' ' Use ``zero_division`` parameter to control' ' this behavior.') - assert_warns_message(w, msg, f, [1, 1, 2], [0, 1, 2], average=average, - zero_division=zero_division) + assert_warns_message(w, msg, f, [1, 1, 2], [0, 1, 2], average=average) # average of per-sample scores msg = ('Precision and F-score are ill-defined and ' @@ -1645,16 +1650,14 @@ def test_prf_warnings(zero_division): ' Use ``zero_division`` parameter to control' ' this behavior.') assert_warns_message(w, msg, f, np.array([[1, 0], [1, 0]]), - np.array([[1, 0], [0, 0]]), average='samples', - zero_division=zero_division) + np.array([[1, 0], [0, 0]]), average='samples') msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 in samples with no true labels.' ' Use ``zero_division`` parameter to control' ' this behavior.') assert_warns_message(w, msg, f, np.array([[1, 0], [0, 0]]), - np.array([[1, 0], [1, 0]]), - average='samples', zero_division=zero_division) + np.array([[1, 0], [1, 0]]), average='samples') # single score: micro-average msg = ('Precision and F-score are ill-defined and ' @@ -1662,36 +1665,31 @@ def test_prf_warnings(zero_division): ' Use ``zero_division`` parameter to control' ' this behavior.') assert_warns_message(w, msg, f, np.array([[1, 1], [1, 1]]), - np.array([[0, 0], [0, 0]]), average='micro', - zero_division=zero_division) + np.array([[0, 0], [0, 0]]), average='micro') msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 due to no true samples.' ' Use ``zero_division`` parameter to control' ' this behavior.') assert_warns_message(w, msg, f, np.array([[0, 0], [0, 0]]), - np.array([[1, 1], [1, 1]]), average='micro', - zero_division=zero_division) + np.array([[1, 1], [1, 1]]), average='micro') # single positive label msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 due to no predicted samples.' ' Use ``zero_division`` parameter to control' ' this behavior.') - assert_warns_message(w, msg, f, [1, 1], [-1, -1], average='binary', - zero_division=zero_division) + assert_warns_message(w, msg, f, [1, 1], [-1, -1], average='binary') msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 due to no true samples.' ' Use ``zero_division`` parameter to control' ' this behavior.') - assert_warns_message(w, msg, f, [-1, -1], [1, 1], average='binary', - zero_division=zero_division) + assert_warns_message(w, msg, f, [-1, -1], [1, 1], average='binary') with warnings.catch_warnings(record=True) as record: warnings.simplefilter('always') - precision_recall_fscore_support([0, 0], [0, 0], average="binary", - zero_division=zero_division) + precision_recall_fscore_support([0, 0], [0, 0], average="binary") msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 due to no true samples.' ' Use ``zero_division`` parameter to control' @@ -1993,7 +1991,7 @@ def test_hinge_loss_multiclass(): np.clip(dummy_losses, 0, None, out=dummy_losses) dummy_hinge_loss = np.mean(dummy_losses) assert (hinge_loss(y_true, pred_decision) == - dummy_hinge_loss) + dummy_hinge_loss) def test_hinge_loss_multiclass_missing_labels_with_labels_none(): @@ -2030,7 +2028,7 @@ def test_hinge_loss_multiclass_with_missing_labels(): np.clip(dummy_losses, 0, None, out=dummy_losses) dummy_hinge_loss = np.mean(dummy_losses) assert (hinge_loss(y_true, pred_decision, labels=labels) == - dummy_hinge_loss) + dummy_hinge_loss) def test_hinge_loss_multiclass_invariance_lists(): From 78911568a66a8cf065012c9790321c0ff3b5b8d0 Mon Sep 17 00:00:00 2001 From: marctorrellas Date: Wed, 25 Sep 2019 22:12:28 +0100 Subject: [PATCH 09/10] small refactoring requested by @thomasjpfan --- sklearn/metrics/classification.py | 25 +++++++--------- sklearn/metrics/tests/test_classification.py | 30 ++++++++++---------- 2 files changed, 25 insertions(+), 30 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 5a5f75dffc37b..fdbfd52425a41 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -42,12 +42,10 @@ def _check_zero_division(zero_division): - if isinstance(zero_division, str): - if zero_division == "warn": - return - elif isinstance(zero_division, (int, float)): - if zero_division in [0, 1]: - return + if isinstance(zero_division, str) and zero_division == "warn": + return + elif isinstance(zero_division, (int, float)) and zero_division in [0, 1]: + return raise ValueError('Got zero_division={0}.' ' Must be one of ["warn", 0, 1]'.format(zero_division)) @@ -1251,25 +1249,23 @@ def _prf_divide(numerator, denominator, metric, else: return result - msg = _build_prf_warning_message(average, modifier, msg_start, len(result)) + _warn_prf(average, modifier, msg_start, len(result)) - warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) return result -def _build_prf_warning_message(average, modifier, msg_start, result_size): - axis0 = 'sample' - axis1 = 'label' +def _warn_prf(average, modifier, msg_start, result_size): + axis0, axis1 = 'sample', 'label' if average == 'samples': axis0, axis1 = axis1, axis0 msg = ('{0} ill-defined and being set to 0.0 {{0}} ' - 'no {1} {2}s. Use ``zero_division`` parameter to control' + 'no {1} {2}s. Use `zero_division` parameter to control' ' this behavior.'.format(msg_start, modifier, axis0)) if result_size == 1: msg = msg.format('due to') else: msg = msg.format('in {0}s with'.format(axis1)) - return msg + warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) def _check_set_wise_labels(y_true, y_pred, average, labels, pos_label): @@ -1494,10 +1490,9 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, # and BOTH prec and rec are ill-defined if zero_division == "warn" and ("f-score",) == warn_for: if (pred_sum[true_sum == 0] == 0).any(): - msg = _build_prf_warning_message( + _warn_prf( average, "true nor predicted", 'F-score is', len(true_sum) ) - warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) # if tp == 0 F will be 1 only if all predictions are zero, all labels are # zero, and zero_division=1. In all other case, 0 diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 9a093003e9983..29eb44d149194 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1634,27 +1634,27 @@ def test_prf_warnings(): msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 in labels with no predicted samples.' - ' Use ``zero_division`` parameter to control' + ' Use `zero_division` parameter to control' ' this behavior.') assert_warns_message(w, msg, f, [0, 1, 2], [1, 1, 2], average=average) msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 in labels with no true samples.' - ' Use ``zero_division`` parameter to control' + ' Use `zero_division` parameter to control' ' this behavior.') assert_warns_message(w, msg, f, [1, 1, 2], [0, 1, 2], average=average) # average of per-sample scores msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 in samples with no predicted labels.' - ' Use ``zero_division`` parameter to control' + ' Use `zero_division` parameter to control' ' this behavior.') assert_warns_message(w, msg, f, np.array([[1, 0], [1, 0]]), np.array([[1, 0], [0, 0]]), average='samples') msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 in samples with no true labels.' - ' Use ``zero_division`` parameter to control' + ' Use `zero_division` parameter to control' ' this behavior.') assert_warns_message(w, msg, f, np.array([[1, 0], [0, 0]]), np.array([[1, 0], [1, 0]]), average='samples') @@ -1662,14 +1662,14 @@ def test_prf_warnings(): # single score: micro-average msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 due to no predicted samples.' - ' Use ``zero_division`` parameter to control' + ' Use `zero_division` parameter to control' ' this behavior.') assert_warns_message(w, msg, f, np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), average='micro') msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 due to no true samples.' - ' Use ``zero_division`` parameter to control' + ' Use `zero_division` parameter to control' ' this behavior.') assert_warns_message(w, msg, f, np.array([[0, 0], [0, 0]]), np.array([[1, 1], [1, 1]]), average='micro') @@ -1677,13 +1677,13 @@ def test_prf_warnings(): # single positive label msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 due to no predicted samples.' - ' Use ``zero_division`` parameter to control' + ' Use `zero_division` parameter to control' ' this behavior.') assert_warns_message(w, msg, f, [1, 1], [-1, -1], average='binary') msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 due to no true samples.' - ' Use ``zero_division`` parameter to control' + ' Use `zero_division` parameter to control' ' this behavior.') assert_warns_message(w, msg, f, [-1, -1], [1, 1], average='binary') @@ -1692,12 +1692,12 @@ def test_prf_warnings(): precision_recall_fscore_support([0, 0], [0, 0], average="binary") msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 due to no true samples.' - ' Use ``zero_division`` parameter to control' + ' Use `zero_division` parameter to control' ' this behavior.') assert str(record.pop().message) == msg msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 due to no predicted samples.' - ' Use ``zero_division`` parameter to control' + ' Use `zero_division` parameter to control' ' this behavior.') assert str(record.pop().message) == msg @@ -1761,7 +1761,7 @@ def test_recall_warnings(zero_division): assert (str(record.pop().message) == 'Recall is ill-defined and ' 'being set to 0.0 due to no true samples.' - ' Use ``zero_division`` parameter to control' + ' Use `zero_division` parameter to control' ' this behavior.') else: assert len(record) == 0 @@ -1771,7 +1771,7 @@ def test_recall_warnings(zero_division): assert (str(record.pop().message) == 'Recall is ill-defined and ' 'being set to 0.0 due to no true samples.' - ' Use ``zero_division`` parameter to control' + ' Use `zero_division` parameter to control' ' this behavior.') @@ -1786,7 +1786,7 @@ def test_precision_warnings(zero_division): assert (str(record.pop().message) == 'Precision is ill-defined and ' 'being set to 0.0 due to no predicted samples.' - ' Use ``zero_division`` parameter to control' + ' Use `zero_division` parameter to control' ' this behavior.') else: assert len(record) == 0 @@ -1796,7 +1796,7 @@ def test_precision_warnings(zero_division): assert (str(record.pop().message) == 'Precision is ill-defined and ' 'being set to 0.0 due to no predicted samples.' - ' Use ``zero_division`` parameter to control' + ' Use `zero_division` parameter to control' ' this behavior.') assert_no_warnings(precision_score, @@ -1828,7 +1828,7 @@ def test_fscore_warnings(zero_division): assert (str(record.pop().message) == 'F-score is ill-defined and ' 'being set to 0.0 due to no true nor predicted ' - 'samples. Use ``zero_division`` parameter to ' + 'samples. Use `zero_division` parameter to ' 'control this behavior.') else: assert len(record) == 0 From 3d3760e54ef0363aef44537d7ac092bc2c3dd1cd Mon Sep 17 00:00:00 2001 From: marctorrellas Date: Thu, 10 Oct 2019 23:58:18 +0100 Subject: [PATCH 10/10] assert_warns --> pytest.warns --- sklearn/metrics/tests/test_classification.py | 21 +++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 29eb44d149194..f668b253b553b 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1551,16 +1551,17 @@ def test_precision_recall_f1_no_labels_check_warnings(average): y_pred = np.zeros_like(y_true) func = precision_recall_fscore_support - p, r, f, s = assert_warns(UndefinedMetricWarning, func, y_true, y_pred, - average=average, beta=1.0) - fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred, - average=average, beta=1.0) + with pytest.warns(UndefinedMetricWarning): + p, r, f, s = func(y_true, y_pred, average=average, beta=1.0) assert_almost_equal(p, 0) assert_almost_equal(r, 0) assert_almost_equal(f, 0) assert s is None + with pytest.warns(UndefinedMetricWarning): + fbeta = fbeta_score(y_true, y_pred, average=average, beta=1.0) + assert_almost_equal(fbeta, 0) @@ -1613,17 +1614,19 @@ def test_precision_recall_f1_no_labels_average_none_warn(): # |y_i| = [0, 0, 0] # |y_hat_i| = [0, 0, 0] - p, r, f, s = assert_warns(UndefinedMetricWarning, - precision_recall_fscore_support, - y_true, y_pred, average=None, beta=1) - fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred, - beta=1, average=None) + with pytest.warns(UndefinedMetricWarning): + p, r, f, s = precision_recall_fscore_support( + y_true, y_pred, average=None, beta=1 + ) assert_array_almost_equal(p, [0, 0, 0], 2) assert_array_almost_equal(r, [0, 0, 0], 2) assert_array_almost_equal(f, [0, 0, 0], 2) assert_array_almost_equal(s, [0, 0, 0], 2) + with pytest.warns(UndefinedMetricWarning): + fbeta = fbeta_score(y_true, y_pred, beta=1, average=None) + assert_array_almost_equal(fbeta, [0, 0, 0], 2)