From 84d3813c362f8582bcc2ca8cd4ab2323919c77ea Mon Sep 17 00:00:00 2001
From: marctorrellas <mtorrellassocastro@gmail.com>
Date: Fri, 6 Sep 2019 04:20:59 +0100
Subject: [PATCH 01/10] first commit for issue 14876: zero_division parameter

---
 doc/whats_new/v0.21.rst                      |   7 +
 sklearn/metrics/classification.py            | 130 +++++++++++---
 sklearn/metrics/tests/test_classification.py | 180 +++++++++++++------
 3 files changed, 235 insertions(+), 82 deletions(-)

diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 28c1cc40542e2..e275cf3e6efc8 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -69,6 +69,13 @@ Support for Python 3.4 and below has been officially dropped.
 :mod:`sklearn.metrics`
 ......................
 
+- |Feature| Added a new parameter ``zero_division`` to multiple classification
+metrics: :func:`precision_score`, :func:`recall_score`, :func:`f1_score`,
+:func:`fbeta_score`, :func:`precision_recall_fscore_support`,
+:func:`classification_report`. This allows to set returned value for
+ill-defined metrics.
+  :issue:`14876` by :user:`Marc Torrellas Socastro <marctorrellas>`.
+
 - |Feature| Added the :func:`metrics.max_error` metric and a corresponding
   ``'max_error'`` scorer for single output regression.
   :issue:`12232` by :user:`Krishna Sangeeth <whiletruelearn>`.
diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index e8f7f85163259..1a006dfe01983 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -42,6 +42,17 @@
 from ..exceptions import UndefinedMetricWarning
 
 
+def _check_zero_division(zero_division):
+    if isinstance(zero_division, str):
+        if zero_division != "warn":
+            raise ValueError('zero_division must be one of ["warn", 0, 1]')
+    elif isinstance(zero_division, (int, float)):
+        if zero_division not in [0, 1]:
+            raise ValueError('zero_division must be one of ["warn", 0, 1]')
+    else:
+        raise TypeError('zero_division must be one of ["warn", 0, 1]')
+
+
 def _check_targets(y_true, y_pred):
     """Check that y_true and y_pred belong to the same classification task
 
@@ -822,7 +833,7 @@ def zero_one_loss(y_true, y_pred, normalize=True, sample_weight=None):
 
 
 def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
-             sample_weight=None):
+             sample_weight=None, zero_division="warn"):
     """Compute the F1 score, also known as balanced F-score or F-measure
 
     The F1 score can be interpreted as a weighted average of the precision and
@@ -892,6 +903,12 @@ def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
     sample_weight : array-like of shape = [n_samples], optional
         Sample weights.
 
+    zero_division : string or int, default="warn"
+        Sets the behavior when there is a zero division. If set to
+        ("warn"|0)/1, returns 0/1 when both precision and recall are zero
+        (calculated using the same value for this parameter).
+        If ``zero_division != "warn"``, warnings are suppressed
+
     Returns
     -------
     f1_score : float or array of float, shape = [n_unique_labels]
@@ -921,15 +938,20 @@ def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
     0.26...
     >>> f1_score(y_true, y_pred, average=None)
     array([0.8, 0. , 0. ])
+    >>> y_true = [0, 0, 0, 0, 0, 0]
+    >>> y_pred = [0, 0, 0, 0, 0, 0]
+    >>> f1_score(y_true, y_pred, zero_division=1)
+    1.0...
 
     """
     return fbeta_score(y_true, y_pred, 1, labels=labels,
                        pos_label=pos_label, average=average,
-                       sample_weight=sample_weight)
+                       sample_weight=sample_weight,
+                       zero_division=zero_division)
 
 
 def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1,
-                average='binary', sample_weight=None):
+                average='binary', sample_weight=None, zero_division="warn"):
     """Compute the F-beta score
 
     The F-beta score is the weighted harmonic mean of precision and recall,
@@ -999,6 +1021,12 @@ def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1,
     sample_weight : array-like of shape = [n_samples], optional
         Sample weights.
 
+    zero_division : string or int, default="warn"
+        Sets the behavior when there is a zero division. If set to
+        ("warn"|0)/1, returns 0/1 when both precision and recall are zero
+        (calculated using the same value for this parameter).
+        If ``zero_division != "warn"``, warnings are suppressed
+
     Returns
     -------
     fbeta_score : float (if average is not None) or array of float, shape =\
@@ -1043,15 +1071,17 @@ def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1,
                                                  pos_label=pos_label,
                                                  average=average,
                                                  warn_for=('f-score',),
-                                                 sample_weight=sample_weight)
+                                                 sample_weight=sample_weight,
+                                                 zero_division=zero_division)
     return f
 
 
-def _prf_divide(numerator, denominator, metric, modifier, average, warn_for):
+def _prf_divide(numerator, denominator, metric,
+                modifier, average, warn_for, zero_division):
     """Performs division and handles divide-by-zero.
 
-    On zero-division, sets the corresponding result elements to zero
-    and raises a warning.
+    On zero-division, sets the corresponding result elements equal to
+    ``zero_division`` and raises a warning.
 
     The metric, modifier and average arguments are used only for determining
     an appropriate warning.
@@ -1062,7 +1092,12 @@ def _prf_divide(numerator, denominator, metric, modifier, average, warn_for):
         return result
 
     # remove infs
-    result[mask] = 0.0
+    result[mask] = float(zero_division == 1)
+
+    # the user will be removing warnings if zero_division is set to something
+    # different than its default value
+    if zero_division != "warn":
+        return result
 
     # build appropriate warning
     # E.g. "Precision and F-score are ill-defined and being set to 0.0 in
@@ -1095,7 +1130,8 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
                                     pos_label=1, average=None,
                                     warn_for=('precision', 'recall',
                                               'f-score'),
-                                    sample_weight=None):
+                                    sample_weight=None,
+                                    zero_division="warn"):
     """Compute precision, recall, F-measure and support for each class
 
     The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of
@@ -1179,6 +1215,12 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
     sample_weight : array-like of shape = [n_samples], optional
         Sample weights.
 
+    zero_division : string or int, default="warn"
+        Sets the behavior when there is a zero division. If set to
+        ("warn"|0)/1, returns 0/1 for precision, recall, and F-measure when
+        their computation implies a zero division. If
+        ``zero_division != "warn"``, warnings are suppressed
+
     Returns
     -------
     precision : float (if average is not None) or array of float, shape =\
@@ -1233,6 +1275,7 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
      array([2, 2, 2]))
 
     """
+    _check_zero_division(zero_division)
     average_options = (None, 'micro', 'macro', 'weighted', 'samples')
     if average not in average_options and average != 'binary':
         raise ValueError('average has to be one of ' +
@@ -1249,7 +1292,8 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
             if pos_label not in present_labels:
                 if len(present_labels) < 2:
                     # Only negative labels
-                    return (0., 0., 0., 0)
+                    zero_division = float(zero_division == 1)
+                    return zero_division, zero_division, zero_division, None
                 else:
                     raise ValueError("pos_label=%r is not a valid label: %r" %
                                      (pos_label, present_labels))
@@ -1285,22 +1329,31 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
 
         # Oddly, we may get an "invalid" rather than a "divide" error
         # here.
-        precision = _prf_divide(tp_sum, pred_sum,
-                                'precision', 'predicted', average, warn_for)
-        recall = _prf_divide(tp_sum, true_sum,
-                             'recall', 'true', average, warn_for)
+        precision = _prf_divide(tp_sum, pred_sum, 'precision',
+                                'predicted', average, warn_for, zero_division)
+        recall = _prf_divide(tp_sum, true_sum, 'recall',
+                             'true', average, warn_for, zero_division)
         # Don't need to warn for F: either P or R warned, or tp == 0 where pos
         # and true are nonzero, in which case, F is well-defined and zero
         f_score = ((1 + beta2) * precision * recall /
                    (beta2 * precision + recall))
         f_score[tp_sum == 0] = 0.0
+        f_score[(true_sum == 0) & (pred_sum == 0)] = float(zero_division == 1)
 
     # Average the results
 
     if average == 'weighted':
         weights = true_sum
         if weights.sum() == 0:
-            return 0, 0, 0, None
+            # precision is zero_division if there are no positive predictions
+            # recall is zero_division if there are no positive labels
+            # fscore is zero_division if all labels AND predictions are
+            # negative
+            return (float(zero_division == 1) if pred_sum.sum() == 0 else 0,
+                    float(zero_division == 1),
+                    float(zero_division == 1) if pred_sum.sum() == 0 else 0,
+                    None)
+
     elif average == 'samples':
         weights = sample_weight
     else:
@@ -1317,7 +1370,8 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
 
 
 def precision_score(y_true, y_pred, labels=None, pos_label=1,
-                    average='binary', sample_weight=None):
+                    average='binary', sample_weight=None,
+                    zero_division="warn"):
     """Compute the precision
 
     The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of
@@ -1383,6 +1437,11 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1,
     sample_weight : array-like of shape = [n_samples], optional
         Sample weights.
 
+    zero_division : string or int, default="warn"
+        Sets the behavior when there is a zero division. If set to
+        ("warn"|0)/1, returns 0/1 when there are no positive predictions.
+        If ``zero_division != "warn"``, warnings are suppressed
+
     Returns
     -------
     precision : float (if average is not None) or array of float, shape =\
@@ -1409,19 +1468,24 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1,
     0.22...
     >>> precision_score(y_true, y_pred, average=None)  # doctest: +ELLIPSIS
     array([0.66..., 0.        , 0.        ])
-
+    >>> y_pred = [0, 0, 0, 0, 0, 0]
+    >>> precision_score(y_true, y_pred)  # doctest: +ELLIPSIS
+    0.0...
+    >>> precision_score(y_true, y_pred, zero_division=1)  # doctest: +ELLIPSIS
+    1.0...
     """
     p, _, _, _ = precision_recall_fscore_support(y_true, y_pred,
                                                  labels=labels,
                                                  pos_label=pos_label,
                                                  average=average,
                                                  warn_for=('precision',),
-                                                 sample_weight=sample_weight)
+                                                 sample_weight=sample_weight,
+                                                 zero_division=zero_division)
     return p
 
 
 def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
-                 sample_weight=None):
+                 sample_weight=None, zero_division="warn"):
     """Compute the recall
 
     The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
@@ -1486,6 +1550,11 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
     sample_weight : array-like of shape = [n_samples], optional
         Sample weights.
 
+    zero_division : string or int, default="warn"
+        Sets the behavior when there is a zero division. If set to
+        ("warn"|0)/1, returns 0/1 when there are no positive labels.
+        If ``zero_division != "warn"``, warnings are suppressed
+
     Returns
     -------
     recall : float (if average is not None) or array of float, shape =\
@@ -1511,6 +1580,11 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
     0.33...
     >>> recall_score(y_true, y_pred, average=None)
     array([1., 0., 0.])
+    >>> y_true = [0, 0, 0, 0, 0, 0]
+    >>> recall_score(y_true, y_pred)
+    0.0...
+    >>> recall_score(y_true, y_pred, zero_division=1)
+    1.0...
 
     """
     _, r, _, _ = precision_recall_fscore_support(y_true, y_pred,
@@ -1518,7 +1592,8 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
                                                  pos_label=pos_label,
                                                  average=average,
                                                  warn_for=('recall',),
-                                                 sample_weight=sample_weight)
+                                                 sample_weight=sample_weight,
+                                                 zero_division=zero_division)
     return r
 
 
@@ -1600,7 +1675,8 @@ def balanced_accuracy_score(y_true, y_pred, sample_weight=None,
 
 
 def classification_report(y_true, y_pred, labels=None, target_names=None,
-                          sample_weight=None, digits=2, output_dict=False):
+                          sample_weight=None, digits=2, output_dict=False,
+                          zero_division="warn"):
     """Build a text report showing the main classification metrics
 
     Read more in the :ref:`User Guide <classification_report>`.
@@ -1630,6 +1706,12 @@ def classification_report(y_true, y_pred, labels=None, target_names=None,
     output_dict : bool (default = False)
         If True, return output as dict
 
+    zero_division : string or int, default="warn"
+        Sets the behavior when there is a zero division. If set to
+        ("warn"|0)/1, returns 0/1 for precision, recall, and f1 when their
+        computation implies a zero division. If ``zero_division != "warn"``,
+        warnings are suppressed
+
     Returns
     -------
     report : string / dict
@@ -1709,7 +1791,8 @@ class 2       1.00      0.67      0.80         3
     p, r, f1, s = precision_recall_fscore_support(y_true, y_pred,
                                                   labels=labels,
                                                   average=None,
-                                                  sample_weight=sample_weight)
+                                                  sample_weight=sample_weight,
+                                                  zero_division=zero_division)
     rows = zip(target_names, p, r, f1, s)
 
     if y_type.startswith('multilabel'):
@@ -1875,7 +1958,8 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None,
     y_true : array-like or label indicator matrix
         Ground truth (correct) labels for n_samples samples.
 
-    y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)
+    y_pred : array-like of float, shape = (n_samples, n_classes) or
+        (n_samples,)
         Predicted probabilities, as returned by a classifier's
         predict_proba method. If ``y_pred.shape = (n_samples,)``
         the probabilities provided are assumed to be that of the
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 3152521f23b77..79d039b0802ff 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -528,8 +528,12 @@ def test_cohen_kappa():
     y1 = np.array([0] * 46 + [1] * 44 + [2] * 10)
     y2 = np.array([0] * 50 + [1] * 40 + [2] * 10)
     assert_almost_equal(cohen_kappa_score(y1, y2), .9315, decimal=4)
-    assert_almost_equal(cohen_kappa_score(y1, y2, weights="linear"), .9412, decimal=4)
-    assert_almost_equal(cohen_kappa_score(y1, y2, weights="quadratic"), .9541, decimal=4)
+    assert_almost_equal(
+        cohen_kappa_score(y1, y2, weights="linear"), .9412, decimal=4
+    )
+    assert_almost_equal(cohen_kappa_score(
+        y1, y2, weights="quadratic"), .9541, decimal=4
+    )
 
 
 @ignore_warnings
@@ -1348,26 +1352,34 @@ def test_precision_recall_f1_score_with_an_empty_prediction():
 
 @pytest.mark.parametrize('beta', [1])
 @pytest.mark.parametrize('average', ["macro", "micro", "weighted", "samples"])
-def test_precision_recall_f1_no_labels(beta, average):
+@pytest.mark.parametrize('zero_division', ["warn", 0, 1])
+def test_precision_recall_f1_no_labels(beta, average, zero_division):
     y_true = np.zeros((20, 3))
     y_pred = np.zeros_like(y_true)
 
-    p, r, f, s = assert_warns(UndefinedMetricWarning,
-                              precision_recall_fscore_support,
-                              y_true, y_pred, average=average,
-                              beta=beta)
-    assert_almost_equal(p, 0)
-    assert_almost_equal(r, 0)
-    assert_almost_equal(f, 0)
+    func = precision_recall_fscore_support
+    my_assert = (assert_warns if zero_division == "warn"
+                 else assert_no_warnings)
+    tmp = ([UndefinedMetricWarning, func] if zero_division == "warn"
+           else [func])
+    p, r, f, s = my_assert(*tmp, y_true, y_pred, average=average,
+                           beta=beta, zero_division=zero_division)
+    tmp = ([UndefinedMetricWarning, fbeta_score] if zero_division == "warn"
+           else [fbeta_score])
+    fbeta = my_assert(*tmp, y_true, y_pred, beta=beta,
+                      average=average, zero_division=zero_division)
+
+    zero_division = float(zero_division == 1)
+    assert_almost_equal(p, zero_division)
+    assert_almost_equal(r, zero_division)
+    assert_almost_equal(f, zero_division)
     assert_equal(s, None)
 
-    fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
-                         y_true, y_pred,
-                         beta=beta, average=average)
-    assert_almost_equal(fbeta, 0)
+    assert_almost_equal(fbeta, float(zero_division == 1))
 
 
-def test_precision_recall_f1_no_labels_average_none():
+@pytest.mark.parametrize('zero_division', ["warn", 0, 1])
+def test_precision_recall_f1_no_labels_average_none(zero_division):
     y_true = np.zeros((20, 3))
     y_pred = np.zeros_like(y_true)
 
@@ -1381,100 +1393,143 @@ def test_precision_recall_f1_no_labels_average_none():
     # |y_i| = [0, 0, 0]
     # |y_hat_i| = [0, 0, 0]
 
-    p, r, f, s = assert_warns(UndefinedMetricWarning,
-                              precision_recall_fscore_support,
-                              y_true, y_pred, average=None, beta=beta)
-    assert_array_almost_equal(p, [0, 0, 0], 2)
-    assert_array_almost_equal(r, [0, 0, 0], 2)
-    assert_array_almost_equal(f, [0, 0, 0], 2)
+    func = precision_recall_fscore_support
+    my_assert = (assert_warns if zero_division == "warn"
+                 else assert_no_warnings)
+    tmp = ([UndefinedMetricWarning, func] if zero_division == "warn"
+           else [func])
+    p, r, f, s = my_assert(*tmp, y_true, y_pred, average=None,
+                           beta=beta, zero_division=zero_division)
+    tmp = ([UndefinedMetricWarning, fbeta_score] if zero_division == "warn"
+           else [fbeta_score])
+    fbeta = my_assert(*tmp, y_true, y_pred, beta=beta,
+                      average=None, zero_division=zero_division)
+
+    zero_division = float(zero_division == 1)
+    assert_array_almost_equal(
+        p, [zero_division, zero_division, zero_division], 2
+    )
+    assert_array_almost_equal(
+        r, [zero_division, zero_division, zero_division], 2
+    )
+    assert_array_almost_equal(
+        f, [zero_division, zero_division, zero_division], 2
+    )
     assert_array_almost_equal(s, [0, 0, 0], 2)
 
-    fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
-                         y_true, y_pred, beta=beta, average=None)
-    assert_array_almost_equal(fbeta, [0, 0, 0], 2)
+    assert_array_almost_equal(
+        fbeta, [zero_division, zero_division, zero_division], 2
+    )
 
 
-def test_prf_warnings():
+@pytest.mark.parametrize('zero_division', ["warn", 0, 1])
+def test_prf_warnings(zero_division):
     # average of per-label scores
     f, w = precision_recall_fscore_support, UndefinedMetricWarning
-    my_assert = assert_warns_message
+    my_assert = (assert_warns_message
+                 if zero_division == "warn" else assert_no_warnings)
     for average in [None, 'weighted', 'macro']:
+
         msg = ('Precision and F-score are ill-defined and '
                'being set to 0.0 in labels with no predicted samples.')
-        my_assert(w, msg, f, [0, 1, 2], [1, 1, 2], average=average)
+        tmp = [w, msg, f] if zero_division == "warn" else [f]
+        my_assert(*tmp, [0, 1, 2], [1, 1, 2], average=average,
+                  zero_division=zero_division)
 
         msg = ('Recall and F-score are ill-defined and '
                'being set to 0.0 in labels with no true samples.')
-        my_assert(w, msg, f, [1, 1, 2], [0, 1, 2], average=average)
+        tmp = [w, msg, f] if zero_division == "warn" else [f]
+        my_assert(*tmp, [1, 1, 2], [0, 1, 2], average=average,
+                  zero_division=zero_division)
 
     # average of per-sample scores
     msg = ('Precision and F-score are ill-defined and '
            'being set to 0.0 in samples with no predicted labels.')
-    my_assert(w, msg, f, np.array([[1, 0], [1, 0]]),
-              np.array([[1, 0], [0, 0]]), average='samples')
+    tmp = [w, msg, f] if zero_division == "warn" else [f]
+    my_assert(*tmp, np.array([[1, 0], [1, 0]]),
+              np.array([[1, 0], [0, 0]]), average='samples',
+              zero_division=zero_division)
 
     msg = ('Recall and F-score are ill-defined and '
            'being set to 0.0 in samples with no true labels.')
-    my_assert(w, msg, f, np.array([[1, 0], [0, 0]]),
+    tmp = [w, msg, f] if zero_division == "warn" else [f]
+    my_assert(*tmp, np.array([[1, 0], [0, 0]]),
               np.array([[1, 0], [1, 0]]),
-              average='samples')
+              average='samples', zero_division=zero_division)
 
     # single score: micro-average
     msg = ('Precision and F-score are ill-defined and '
            'being set to 0.0 due to no predicted samples.')
-    my_assert(w, msg, f, np.array([[1, 1], [1, 1]]),
-              np.array([[0, 0], [0, 0]]), average='micro')
+    tmp = [w, msg, f] if zero_division == "warn" else [f]
+    my_assert(*tmp, np.array([[1, 1], [1, 1]]),
+              np.array([[0, 0], [0, 0]]), average='micro',
+              zero_division=zero_division)
 
     msg = ('Recall and F-score are ill-defined and '
            'being set to 0.0 due to no true samples.')
-    my_assert(w, msg, f, np.array([[0, 0], [0, 0]]),
-              np.array([[1, 1], [1, 1]]), average='micro')
+    tmp = [w, msg, f] if zero_division == "warn" else [f]
+    my_assert(*tmp, np.array([[0, 0], [0, 0]]),
+              np.array([[1, 1], [1, 1]]), average='micro',
+              zero_division=zero_division)
 
     # single positive label
     msg = ('Precision and F-score are ill-defined and '
            'being set to 0.0 due to no predicted samples.')
-    my_assert(w, msg, f, [1, 1], [-1, -1], average='binary')
+    tmp = [w, msg, f] if zero_division == "warn" else [f]
+    my_assert(*tmp, [1, 1], [-1, -1], average='binary',
+              zero_division=zero_division)
 
     msg = ('Recall and F-score are ill-defined and '
            'being set to 0.0 due to no true samples.')
-    my_assert(w, msg, f, [-1, -1], [1, 1], average='binary')
+    tmp = [w, msg, f] if zero_division == "warn" else [f]
+    my_assert(*tmp, [-1, -1], [1, 1], average='binary',
+              zero_division=zero_division)
 
 
-def test_recall_warnings():
+@pytest.mark.parametrize('zero_division', ["warn", 0, 1])
+def test_recall_warnings(zero_division):
     assert_no_warnings(recall_score,
                        np.array([[1, 1], [1, 1]]),
                        np.array([[0, 0], [0, 0]]),
-                       average='micro')
+                       average='micro', zero_division=zero_division)
     clean_warning_registry()
     with warnings.catch_warnings(record=True) as record:
         warnings.simplefilter('always')
         recall_score(np.array([[0, 0], [0, 0]]),
                      np.array([[1, 1], [1, 1]]),
-                     average='micro')
-        assert_equal(str(record.pop().message),
-                     'Recall is ill-defined and '
-                     'being set to 0.0 due to no true samples.')
+                     average='micro', zero_division=zero_division)
+        if zero_division == "warn":
+            assert_equal(str(record.pop().message),
+                         'Recall is ill-defined and '
+                         'being set to 0.0 due to no true samples.')
+        else:
+            assert_equal(len(record), 0)
 
 
-def test_precision_warnings():
+@pytest.mark.parametrize('zero_division', ["warn", 0, 1])
+def test_precision_warnings(zero_division):
     clean_warning_registry()
     with warnings.catch_warnings(record=True) as record:
         warnings.simplefilter('always')
 
         precision_score(np.array([[1, 1], [1, 1]]),
                         np.array([[0, 0], [0, 0]]),
-                        average='micro')
-        assert_equal(str(record.pop().message),
-                     'Precision is ill-defined and '
-                     'being set to 0.0 due to no predicted samples.')
+                        average='micro', zero_division=zero_division)
+        if zero_division == "warn":
+            assert_equal(str(record.pop().message),
+                         'Precision is ill-defined and '
+                         'being set to 0.0 due to no predicted samples.')
+        else:
+            assert_equal(len(record), 0)
 
     assert_no_warnings(precision_score,
                        np.array([[0, 0], [0, 0]]),
                        np.array([[1, 1], [1, 1]]),
-                       average='micro')
+                       average='micro', zero_division=zero_division)
 
 
-def test_fscore_warnings():
+@pytest.mark.parametrize('zero_division', ["warn", 0, 1])
+def test_fscore_warnings(zero_division):
     clean_warning_registry()
     with warnings.catch_warnings(record=True) as record:
         warnings.simplefilter('always')
@@ -1482,16 +1537,23 @@ def test_fscore_warnings():
         for score in [f1_score, partial(fbeta_score, beta=2)]:
             score(np.array([[1, 1], [1, 1]]),
                   np.array([[0, 0], [0, 0]]),
-                  average='micro')
-            assert_equal(str(record.pop().message),
-                         'F-score is ill-defined and '
-                         'being set to 0.0 due to no predicted samples.')
+                  average='micro', zero_division=zero_division)
+            if zero_division == "warn":
+                assert_equal(str(record.pop().message),
+                             'F-score is ill-defined and '
+                             'being set to 0.0 due to no predicted samples.')
+            else:
+                assert_equal(len(record), 0)
+
             score(np.array([[0, 0], [0, 0]]),
                   np.array([[1, 1], [1, 1]]),
-                  average='micro')
-            assert_equal(str(record.pop().message),
-                         'F-score is ill-defined and '
-                         'being set to 0.0 due to no true samples.')
+                  average='micro', zero_division=zero_division)
+            if zero_division == "warn":
+                assert_equal(str(record.pop().message),
+                             'F-score is ill-defined and '
+                             'being set to 0.0 due to no true samples.')
+            else:
+                assert_equal(len(record), 0)
 
 
 def test_prf_average_binary_data_non_binary():

From 16dcda17f3d2b6c04399517611f64491f5b106cd Mon Sep 17 00:00:00 2001
From: marctorrellas <mtorrellassocastro@gmail.com>
Date: Sat, 7 Sep 2019 13:14:59 +0100
Subject: [PATCH 02/10] - merge with master - Changed whats_new to 0.22 -
 F-score only warns if both prec and rec are ill-defined - new private method
 to simplify _prf_divide

---
 doc/developers/advanced_installation.rst      |  12 +-
 doc/whats_new/v0.21.rst                       |   7 -
 doc/whats_new/v0.22.rst                       |  23 +++-
 .../plot_out_of_core_classification.py        |  21 +--
 sklearn/cluster/bicluster.py                  |   8 +-
 sklearn/compose/_target.py                    |  14 +-
 sklearn/compose/tests/test_target.py          |  38 ++++++
 sklearn/linear_model/stochastic_gradient.py   |   2 +-
 sklearn/metrics/classification.py             | 128 +++++++++++-------
 sklearn/metrics/tests/test_classification.py  | 114 +++++++++-------
 10 files changed, 229 insertions(+), 138 deletions(-)

diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
index 4f4fb2b073580..0eaac27699d37 100644
--- a/doc/developers/advanced_installation.rst
+++ b/doc/developers/advanced_installation.rst
@@ -242,12 +242,20 @@ The above commands assume that you have the Python installation folder in your
 PATH environment variable.
 
 You will need `Build Tools for Visual Studio 2017
-<https://visualstudio.microsoft.com/de/downloads/>`_.
+<https://visualstudio.microsoft.com/downloads/>`_.
+
+.. warning::
+	You DO NOT need to install Visual Studio 2019. 
+	You only need the "Build Tools for Visual Studio 2019", 
+	under "All downloads" -> "Tools for Visual Studio 2019". 
 
 For 64-bit Python, configure the build environment with::
 
     SET DISTUTILS_USE_SDK=1
-    "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64
+    "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64
+
+Please be aware that the path above might be different from user to user. 
+The aim is to point to the "vcvarsall.bat" file.
 
 And build scikit-learn from this environment::
 
diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index fc4f173d9a1ec..5f5d0e3d04dc5 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -721,13 +721,6 @@ Support for Python 3.4 and below has been officially dropped.
 :mod:`sklearn.metrics`
 ......................
 
-- |Feature| Added a new parameter ``zero_division`` to multiple classification
-metrics: :func:`precision_score`, :func:`recall_score`, :func:`f1_score`,
-:func:`fbeta_score`, :func:`precision_recall_fscore_support`,
-:func:`classification_report`. This allows to set returned value for
-ill-defined metrics.
-  :issue:`14876` by :user:`Marc Torrellas Socastro <marctorrellas>`.
-
 - |Feature| Added the :func:`metrics.max_error` metric and a corresponding
   ``'max_error'`` scorer for single output regression.
   :pr:`12232` by :user:`Krishna Sangeeth <whiletruelearn>`.
diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index 8987ee1cc359b..0bb06752d7c0f 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -75,6 +75,10 @@ Changelog
   1.12.
   :pr:`14510` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+- |Fix| Fixed a bug in :class:`compose.TransformedTargetRegrssor` which did not
+  pass `**fit_params` to the underlying regressor.
+  :pr:`14890` by :user:`Miguel Cabrera <mfcabrera>`.
+
 :mod:`sklearn.datasets`
 .......................
 
@@ -219,7 +223,7 @@ Changelog
 
 -|FIX| Fixed a bug where :class:`kernel_approximation.Nystroem` raised a
  `KeyError` when using `kernel="precomputed"`.
- :pr:`14706` by :user:`Venkatachalam N <venkyyuvy>`. 
+ :pr:`14706` by :user:`Venkatachalam N <venkyyuvy>`.
 
 :mod:`sklearn.linear_model`
 ...........................
@@ -262,6 +266,13 @@ Changelog
 :mod:`sklearn.metrics`
 ......................
 
+- |Feature| Added a new parameter ``zero_division`` to multiple classification
+  metrics: :func:`precision_score`, :func:`recall_score`, :func:`f1_score`,
+  :func:`fbeta_score`, :func:`precision_recall_fscore_support`,
+  :func:`classification_report`. This allows to set returned value for
+  ill-defined metrics.
+  :pr:`14900` by :user:`Marc Torrellas Socastro <marctorrellas>`.
+
 - |Feature| Added the :func:`metrics.nan_euclidean_distances` metric, which
   calculates euclidean distances in the presence of missing values.
   :issue:`12852` by :user:`Ashim Bhattarai <ashimb9>` and 
@@ -343,19 +354,19 @@ Changelog
 - |Enhancement| SVM now throws more specific error when fit on non-square data
   and kernel = precomputed.  :class:`svm.BaseLibSVM`
   :pr:`14336` by :user:`Gregory Dexter <gdex1>`.
-  
+
 :mod:`sklearn.tree`
 ...................
 
 - |Feature| Adds minimal cost complexity pruning, controlled by ``ccp_alpha``,
   to :class:`tree.DecisionTreeClassifier`, :class:`tree.DecisionTreeRegressor`,
   :class:`tree.ExtraTreeClassifier`, :class:`tree.ExtraTreeRegressor`,
-  :class:`ensemble.RandomForestClassifier`, 
+  :class:`ensemble.RandomForestClassifier`,
   :class:`ensemble.RandomForestRegressor`,
-  :class:`ensemble.ExtraTreesClassifier`, 
+  :class:`ensemble.ExtraTreesClassifier`,
   :class:`ensemble.ExtraTreesRegressor`,
-  :class:`ensemble.RandomTreesEmbedding`, 
-  :class:`ensemble.GradientBoostingClassifier`, 
+  :class:`ensemble.RandomTreesEmbedding`,
+  :class:`ensemble.GradientBoostingClassifier`,
   and :class:`ensemble.GradientBoostingRegressor`.
   :pr:`12887` by `Thomas Fan`_.
 
diff --git a/examples/applications/plot_out_of_core_classification.py b/examples/applications/plot_out_of_core_classification.py
index a2f34b5994b15..0c89a31a64b0b 100644
--- a/examples/applications/plot_out_of_core_classification.py
+++ b/examples/applications/plot_out_of_core_classification.py
@@ -11,16 +11,6 @@
 HashingVectorizer that will project each example into the same feature space.
 This is especially useful in the case of text classification where new
 features (words) may appear in each batch.
-
-The dataset used in this example is Reuters-21578 as provided by the UCI ML
-repository. It will be automatically downloaded and uncompressed on first run.
-
-The plot represents the learning curve of the classifier: the evolution
-of classification accuracy over the course of the mini-batches. Accuracy is
-measured on the first 1000 samples, held out as a validation set.
-
-To limit the memory consumption, we queue examples up to a fixed amount before
-feeding them to the learner.
 """
 
 # Authors: Eustache Diemert <eustache@diemert.fr>
@@ -57,6 +47,10 @@ def _not_in_sphinx():
 # Reuters Dataset related routines
 # --------------------------------
 #
+# The dataset used in this example is Reuters-21578 as provided by the UCI ML
+# repository. It will be automatically downloaded and uncompressed on first
+# run.
+
 
 
 class ReutersParser(HTMLParser):
@@ -320,6 +314,13 @@ def progress(cls_name, stats):
 ###############################################################################
 # Plot results
 # ------------
+#
+# The plot represents the learning curve of the classifier: the evolution
+# of classification accuracy over the course of the mini-batches. Accuracy is
+# measured on the first 1000 samples, held out as a validation set.
+#
+# To limit the memory consumption, we queue examples up to a fixed amount
+# before feeding them to the learner.
 
 
 def plot_accuracy(x, y, x_legend):
diff --git a/sklearn/cluster/bicluster.py b/sklearn/cluster/bicluster.py
index d0e4aecd5d99c..5bfd335549012 100644
--- a/sklearn/cluster/bicluster.py
+++ b/sklearn/cluster/bicluster.py
@@ -1,9 +1,7 @@
-"""Spectral biclustering algorithms.
+"""Spectral biclustering algorithms."""
+# Authors : Kemal Eren
+# License: BSD 3 clause
 
-Authors : Kemal Eren
-License: BSD 3 clause
-
-"""
 from abc import ABCMeta, abstractmethod
 
 import numpy as np
diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py
index ce0c76d6486c5..9b94e0275c368 100644
--- a/sklearn/compose/_target.py
+++ b/sklearn/compose/_target.py
@@ -148,7 +148,7 @@ def _fit_transformer(self, y):
                               " you are sure you want to proceed regardless"
                               ", set 'check_inverse=False'", UserWarning)
 
-    def fit(self, X, y, sample_weight=None):
+    def fit(self, X, y, **fit_params):
         """Fit the model according to the given training data.
 
         Parameters
@@ -160,9 +160,10 @@ def fit(self, X, y, sample_weight=None):
         y : array-like, shape (n_samples,)
             Target values.
 
-        sample_weight : array-like, shape (n_samples,) optional
-            Array of weights that are assigned to individual samples.
-            If not provided, then each sample is given unit weight.
+        **fit_params : dict of string -> object
+            Parameters passed to the ``fit`` method of the underlying
+            regressor.
+
 
         Returns
         -------
@@ -197,10 +198,7 @@ def fit(self, X, y, sample_weight=None):
         else:
             self.regressor_ = clone(self.regressor)
 
-        if sample_weight is None:
-            self.regressor_.fit(X, y_trans)
-        else:
-            self.regressor_.fit(X, y_trans, sample_weight=sample_weight)
+        self.regressor_.fit(X, y_trans, **fit_params)
 
         return self
 
diff --git a/sklearn/compose/tests/test_target.py b/sklearn/compose/tests/test_target.py
index cab28f406c5f9..77507b4026f2b 100644
--- a/sklearn/compose/tests/test_target.py
+++ b/sklearn/compose/tests/test_target.py
@@ -14,6 +14,8 @@
 from sklearn.preprocessing import FunctionTransformer
 from sklearn.preprocessing import StandardScaler
 
+from sklearn.pipeline import Pipeline
+
 from sklearn.linear_model import LinearRegression, Lasso
 
 from sklearn import datasets
@@ -294,3 +296,39 @@ def test_transform_target_regressor_count_fit(check_inverse):
     )
     ttr.fit(X, y)
     assert ttr.transformer_.fit_counter == 1
+
+
+class DummyRegressorWithExtraFitParams(DummyRegressor):
+    def fit(self, X, y, sample_weight=None, check_input=True):
+        # on the test below we force this to false, we make sure this is
+        # actually passed to the regressor
+        assert not check_input
+        return super().fit(X, y, sample_weight)
+
+
+def test_transform_target_regressor_pass_fit_parameters():
+    X, y = friedman
+    regr = TransformedTargetRegressor(
+        regressor=DummyRegressorWithExtraFitParams(),
+        transformer=DummyTransformer()
+    )
+
+    regr.fit(X, y, check_input=False)
+    assert regr.transformer_.fit_counter == 1
+
+
+def test_transform_target_regressor_route_pipeline():
+    X, y = friedman
+
+    regr = TransformedTargetRegressor(
+        regressor=DummyRegressorWithExtraFitParams(),
+        transformer=DummyTransformer()
+    )
+    estimators = [
+        ('normalize', StandardScaler()), ('est', regr)
+    ]
+
+    pip = Pipeline(estimators)
+    pip.fit(X, y, **{'est__check_input': False})
+
+    assert regr.transformer_.fit_counter == 1
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 684346a09d1a0..6a11c4a97ee2f 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -124,7 +124,7 @@ def _validate_params(self, for_partial_fit=False):
         if self.n_iter_no_change < 1:
             raise ValueError("n_iter_no_change must be >= 1")
         if not (0.0 < self.validation_fraction < 1.0):
-            raise ValueError("validation_fraction must be in ]0, 1[")
+            raise ValueError("validation_fraction must be in range (0, 1)")
         if self.learning_rate in ("constant", "invscaling", "adaptive"):
             if self.eta0 <= 0.0:
                 raise ValueError("eta0 must be > 0")
diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index cf1307d252819..166e8a029aa1d 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -1028,7 +1028,7 @@ def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
     sample_weight : array-like of shape = [n_samples], optional
         Sample weights.
 
-    zero_division : string or int, default="warn"
+    zero_division : "warn", 0 or 1, default="warn"
         Sets the behavior when there is a zero division. If set to
         ("warn"|0)/1, returns 0/1 when both precision and recall are zero
         (calculated using the same value for this parameter).
@@ -1068,6 +1068,13 @@ def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
     >>> f1_score(y_true, y_pred, zero_division=1)
     1.0...
 
+    Notes
+    -----
+    When ``true positive + false positive == 0``, precision is undefined;
+    When ``true positive + false negative == 0``, recall is undefined.
+    In such cases, by default the metric will be set to 0, as will f-score,
+    and ``UndefinedMetricWarning`` will be raised. This behavior can be
+    modified with ``zero_division``.
     """
     return fbeta_score(y_true, y_pred, 1, labels=labels,
                        pos_label=pos_label, average=average,
@@ -1146,7 +1153,7 @@ def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1,
     sample_weight : array-like of shape = [n_samples], optional
         Sample weights.
 
-    zero_division : string or int, default="warn"
+    zero_division : "warn", 0 or 1, default="warn"
         Sets the behavior when there is a zero division. If set to
         ("warn"|0)/1, returns 0/1 when both precision and recall are zero
         (calculated using the same value for this parameter).
@@ -1185,7 +1192,14 @@ def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1,
     >>> fbeta_score(y_true, y_pred, average=None, beta=0.5)
     array([0.71..., 0.        , 0.        ])
 
+    Notes
+    -----
+    When ``true positive + false positive == 0`` or
+    ``true positive + false negative == 0``, f-score returns 0 and raises
+    ``UndefinedMetricWarning``. This behavior can be
+    modified with ``zero_division``.
     """
+
     _, _, f, _ = precision_recall_fscore_support(y_true, y_pred,
                                                  beta=beta,
                                                  labels=labels,
@@ -1198,7 +1212,7 @@ def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1,
 
 
 def _prf_divide(numerator, denominator, metric,
-                modifier, average, warn_for, zero_division):
+                modifier, average, warn_for, zero_division="warn"):
     """Performs division and handles divide-by-zero.
 
     On zero-division, sets the corresponding result elements equal to
@@ -1208,31 +1222,27 @@ def _prf_divide(numerator, denominator, metric,
     The metric, modifier and average arguments are used only for determining
     an appropriate warning.
     """
-    # TODO: check new
-    # mask = denominator == 0.0
-    # denominator = denominator.copy()
-    # denominator[mask] = 1  # avoid infs/nans
-    # result = numerator / denominator
-    result = numerator / denominator
     mask = denominator == 0.0
+    denominator = denominator.copy()
+    denominator[mask] = 1  # avoid infs/nans
+    result = numerator / denominator
+
     if not np.any(mask):
         return result
 
-    # remove infs
+    # if ``zero_division=1``, set those with denominator == 0 equal to 1
     result[mask] = float(zero_division == 1)
 
     # the user will be removing warnings if zero_division is set to something
-    # different than its default value
-    if zero_division != "warn":
+    # different than its default value. If we are computing only f-score
+    # the warning will be raised only if precision and recall are ill-defined
+    if zero_division != "warn" or metric not in warn_for:
         return result
 
     # build appropriate warning
     # E.g. "Precision and F-score are ill-defined and being set to 0.0 in
-    # labels with no predicted samples"
-    axis0 = 'sample'
-    axis1 = 'label'
-    if average == 'samples':
-        axis0, axis1 = axis1, axis0
+    # labels with no predicted samples. Use ``zero_division`` parameter to
+    # control this behavior."
 
     if metric in warn_for and 'f-score' in warn_for:
         msg_start = '{0} and F-score are'.format(metric.title())
@@ -1243,14 +1253,25 @@ def _prf_divide(numerator, denominator, metric,
     else:
         return result
 
+    msg = _build_prf_warning_message(average, modifier, msg_start, len(result))
+
+    warnings.warn(msg, UndefinedMetricWarning, stacklevel=2)
+    return result
+
+
+def _build_prf_warning_message(average, modifier, msg_start, result_size):
+    axis0 = 'sample'
+    axis1 = 'label'
+    if average == 'samples':
+        axis0, axis1 = axis1, axis0
     msg = ('{0} ill-defined and being set to 0.0 {{0}} '
-           'no {1} {2}s.'.format(msg_start, modifier, axis0))
-    if len(mask) == 1:
+           'no {1} {2}s. Use ``zero_division`` parameter to control'
+           ' this behavior.'.format(msg_start, modifier, axis0))
+    if result_size == 1:
         msg = msg.format('due to')
     else:
         msg = msg.format('in {0}s with'.format(axis1))
-    warnings.warn(msg, UndefinedMetricWarning, stacklevel=2)
-    return result
+    return msg
 
 
 def _check_set_wise_labels(y_true, y_pred, average, labels, pos_label):
@@ -1376,7 +1397,7 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
     sample_weight : array-like of shape = [n_samples], optional
         Sample weights.
 
-    zero_division : string or int, default="warn"
+    zero_division : "warn", 0 or 1, default="warn"
         Sets the behavior when there is a zero division. If set to
         ("warn"|0)/1, returns 0/1 for precision, recall, and F-measure when
         their computation implies a zero division. If
@@ -1431,13 +1452,14 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
     (array([0.        , 0.        , 0.66...]),
      array([0., 0., 1.]), array([0. , 0. , 0.8]),
      array([2, 2, 2]))
-    TODO: add all these Notes and add Unless zero_division...
+
     Notes
     -----
     When ``true positive + false positive == 0``, precision is undefined;
     When ``true positive + false negative == 0``, recall is undefined.
-    In such cases, the metric will be set to 0, as will f-score, and
-    ``UndefinedMetricWarning`` will be raised.
+    In such cases, by default the metric will be set to 0, as will f-score,
+    and ``UndefinedMetricWarning`` will be raised. This behavior can be
+    modified with ``zero_division``.
     """
     _check_zero_division(zero_division)
     if beta < 0:
@@ -1462,31 +1484,31 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
     # Finally, we have all our sufficient statistics. Divide! #
     beta2 = beta ** 2
 
-    # Divide, and on zero-division, set scores to 0 and warn:
-
+    # Divide, and on zero-division, set scores and/or warn according to
+    # zero_division:
     precision = _prf_divide(tp_sum, pred_sum, 'precision',
                             'predicted', average, warn_for, zero_division)
     recall = _prf_divide(tp_sum, true_sum, 'recall',
                          'true', average, warn_for, zero_division)
 
+    # warn for f-score only if zero_division is warn, it is in warn_for
+    # and BOTH prec and rec are ill-defined
+    if zero_division == "warn" and ("f-score",) == warn_for:
+        if (pred_sum[true_sum == 0] == 0).any():
+            msg = _build_prf_warning_message(
+                average, "true nor predicted", 'F-score is', len(true_sum)
+            )
+            warnings.warn(msg, UndefinedMetricWarning, stacklevel=2)
 
-    # TODO: check new version
-
-    # if np.isposinf(beta):
-    #     f_score = recall
-    # else:
-    #     # Don't need to warn for F: either P or R warned, or tp == 0 where pos
-    #     # and true are nonzero, in which case, F is well-defined and zero
-    #     denom = beta2 * precision + recall
-    #     denom[denom == 0.] = 1  # avoid division by 0
-    #     f_score = (1 + beta2) * precision * recall / denom
+    # if tp == 0 F will be 1 only if all predictions are zero, all labels are
+    # zero, and zero_division=1. In all other case, 0
+    if np.isposinf(beta):
+        f_score = recall
+    else:
+        denom = beta2 * precision + recall
 
-    # Don't need to warn for F: either P or R warned, or tp == 0 where pos
-    # and true are nonzero, in which case, F is well-defined and zero
-    f_score = ((1 + beta2) * precision * recall /
-               (beta2 * precision + recall))
-    f_score[tp_sum == 0] = 0.0
-    f_score[(true_sum == 0) & (pred_sum == 0)] = float(zero_division == 1)
+        denom[denom == 0.] = 1  # avoid division by 0
+        f_score = (1 + beta2) * precision * recall / denom
 
     # Average the results
     if average == 'weighted':
@@ -1584,7 +1606,7 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1,
     sample_weight : array-like of shape = [n_samples], optional
         Sample weights.
 
-    zero_division : string or int, default="warn"
+    zero_division : "warn", 0 or 1, default="warn"
         Sets the behavior when there is a zero division. If set to
         ("warn"|0)/1, returns 0/1 when there are no positive predictions.
         If ``zero_division != "warn"``, warnings are suppressed
@@ -1614,15 +1636,16 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1,
     >>> precision_score(y_true, y_pred, average=None)
     array([0.66..., 0.        , 0.        ])
     >>> y_pred = [0, 0, 0, 0, 0, 0]
-    >>> precision_score(y_true, y_pred)  # doctest: +ELLIPSIS
+    >>> precision_score(y_true, y_pred)
     0.0...
-    >>> precision_score(y_true, y_pred, zero_division=1)  # doctest: +ELLIPSIS
+    >>> precision_score(y_true, y_pred, zero_division=1)
     1.0...
-    TODO
+
     Notes
     -----
     When ``true positive + false positive == 0``, precision returns 0 and
-    raises ``UndefinedMetricWarning``.
+    raises ``UndefinedMetricWarning``. This behavior can be
+    modified with ``zero_division``.
 
     """
     p, _, _, _ = precision_recall_fscore_support(y_true, y_pred,
@@ -1701,7 +1724,7 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
     sample_weight : array-like of shape = [n_samples], optional
         Sample weights.
 
-    zero_division : string or int, default="warn"
+    zero_division : "warn", 0 or 1, default="warn"
         Sets the behavior when there is a zero division. If set to
         ("warn"|0)/1, returns 0/1 when there are no positive labels.
         If ``zero_division != "warn"``, warnings are suppressed
@@ -1736,11 +1759,12 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
     0.0...
     >>> recall_score(y_true, y_pred, zero_division=1)
     1.0...
-    TODO
+
     Notes
     -----
     When ``true positive + false negative == 0``, recall returns 0 and raises
-    ``UndefinedMetricWarning``.
+    ``UndefinedMetricWarning``. This behavior can be modified with
+    ``zero_division``.
     """
     _, r, _, _ = precision_recall_fscore_support(y_true, y_pred,
                                                  labels=labels,
@@ -1861,7 +1885,7 @@ def classification_report(y_true, y_pred, labels=None, target_names=None,
     output_dict : bool (default = False)
         If True, return output as dict
 
-    zero_division : string or int, default="warn"
+    zero_division : "warn", 0 or 1, default="warn"
         Sets the behavior when there is a zero division. If set to
         ("warn"|0)/1, returns 0/1 for precision, recall, and f1 when their
         computation implies a zero division. If ``zero_division != "warn"``,
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 06d08b4a25c82..e04cb7f91dc62 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -15,7 +15,6 @@
 from sklearn.utils.validation import check_random_state
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
@@ -287,7 +286,7 @@ def test_precision_recall_f_ignored_labels():
         # ensure the above were meaningful tests:
         for average in ['macro', 'weighted', 'micro']:
             assert (recall_13(average=average) !=
-                             recall_all(average=average))
+                    recall_all(average=average))
 
 
 def test_average_precision_score_score_non_binary_class():
@@ -1533,7 +1532,7 @@ def test_precision_recall_f1_no_labels(beta, average, zero_division):
     assert_almost_equal(p, zero_division)
     assert_almost_equal(r, zero_division)
     assert_almost_equal(f, zero_division)
-    assert_equal(s, None)
+    assert s is None
 
     assert_almost_equal(fbeta, float(zero_division == 1))
 
@@ -1591,27 +1590,35 @@ def test_prf_warnings(zero_division):
     for average in [None, 'weighted', 'macro']:
 
         msg = ('Precision and F-score are ill-defined and '
-               'being set to 0.0 in labels with no predicted samples.')
+               'being set to 0.0 in labels with no predicted samples.'
+               ' Use ``zero_division`` parameter to control'
+               ' this behavior.')
         tmp = [w, msg, f] if zero_division == "warn" else [f]
         my_assert(*tmp, [0, 1, 2], [1, 1, 2], average=average,
                   zero_division=zero_division)
 
         msg = ('Recall and F-score are ill-defined and '
-               'being set to 0.0 in labels with no true samples.')
+               'being set to 0.0 in labels with no true samples.'
+               ' Use ``zero_division`` parameter to control'
+               ' this behavior.')
         tmp = [w, msg, f] if zero_division == "warn" else [f]
         my_assert(*tmp, [1, 1, 2], [0, 1, 2], average=average,
                   zero_division=zero_division)
 
     # average of per-sample scores
     msg = ('Precision and F-score are ill-defined and '
-           'being set to 0.0 in samples with no predicted labels.')
+           'being set to 0.0 in samples with no predicted labels.'
+           ' Use ``zero_division`` parameter to control'
+           ' this behavior.')
     tmp = [w, msg, f] if zero_division == "warn" else [f]
     my_assert(*tmp, np.array([[1, 0], [1, 0]]),
               np.array([[1, 0], [0, 0]]), average='samples',
               zero_division=zero_division)
 
     msg = ('Recall and F-score are ill-defined and '
-           'being set to 0.0 in samples with no true labels.')
+           'being set to 0.0 in samples with no true labels.'
+           ' Use ``zero_division`` parameter to control'
+           ' this behavior.')
     tmp = [w, msg, f] if zero_division == "warn" else [f]
     my_assert(*tmp, np.array([[1, 0], [0, 0]]),
               np.array([[1, 0], [1, 0]]),
@@ -1619,14 +1626,18 @@ def test_prf_warnings(zero_division):
 
     # single score: micro-average
     msg = ('Precision and F-score are ill-defined and '
-           'being set to 0.0 due to no predicted samples.')
+           'being set to 0.0 due to no predicted samples.'
+           ' Use ``zero_division`` parameter to control'
+           ' this behavior.')
     tmp = [w, msg, f] if zero_division == "warn" else [f]
     my_assert(*tmp, np.array([[1, 1], [1, 1]]),
               np.array([[0, 0], [0, 0]]), average='micro',
               zero_division=zero_division)
 
     msg = ('Recall and F-score are ill-defined and '
-           'being set to 0.0 due to no true samples.')
+           'being set to 0.0 due to no true samples.'
+           ' Use ``zero_division`` parameter to control'
+           ' this behavior.')
     tmp = [w, msg, f] if zero_division == "warn" else [f]
     my_assert(*tmp, np.array([[0, 0], [0, 0]]),
               np.array([[1, 1], [1, 1]]), average='micro',
@@ -1634,26 +1645,38 @@ def test_prf_warnings(zero_division):
 
     # single positive label
     msg = ('Precision and F-score are ill-defined and '
-           'being set to 0.0 due to no predicted samples.')
+           'being set to 0.0 due to no predicted samples.'
+           ' Use ``zero_division`` parameter to control'
+           ' this behavior.')
     tmp = [w, msg, f] if zero_division == "warn" else [f]
     my_assert(*tmp, [1, 1], [-1, -1], average='binary',
               zero_division=zero_division)
 
     msg = ('Recall and F-score are ill-defined and '
-           'being set to 0.0 due to no true samples.')
+           'being set to 0.0 due to no true samples.'
+           ' Use ``zero_division`` parameter to control'
+           ' this behavior.')
     tmp = [w, msg, f] if zero_division == "warn" else [f]
     my_assert(*tmp, [-1, -1], [1, 1], average='binary',
               zero_division=zero_division)
-    # TODO
+
     with warnings.catch_warnings(record=True) as record:
         warnings.simplefilter('always')
-        precision_recall_fscore_support([0, 0], [0, 0], average="binary")
-        msg = ('Recall and F-score are ill-defined and '
-               'being set to 0.0 due to no true samples.')
-        assert str(record.pop().message) == msg
-        msg = ('Precision and F-score are ill-defined and '
-               'being set to 0.0 due to no predicted samples.')
-        assert str(record.pop().message) == msg
+        precision_recall_fscore_support([0, 0], [0, 0], average="binary",
+                                        zero_division=zero_division)
+        if zero_division == "warn":
+            msg = ('Recall and F-score are ill-defined and '
+                   'being set to 0.0 due to no true samples.'
+                   ' Use ``zero_division`` parameter to control'
+                   ' this behavior.')
+            assert str(record.pop().message) == msg
+            msg = ('Precision and F-score are ill-defined and '
+                   'being set to 0.0 due to no predicted samples.'
+                   ' Use ``zero_division`` parameter to control'
+                   ' this behavior.')
+            assert str(record.pop().message) == msg
+        else:
+            assert len(record) == 0
 
 
 @pytest.mark.parametrize('zero_division', ["warn", 0, 1])
@@ -1668,9 +1691,11 @@ def test_recall_warnings(zero_division):
                      np.array([[1, 1], [1, 1]]),
                      average='micro', zero_division=zero_division)
         if zero_division == "warn":
-            assert_equal(str(record.pop().message),
-                         'Recall is ill-defined and '
-                         'being set to 0.0 due to no true samples.')
+            assert (str(record.pop().message) ==
+                    'Recall is ill-defined and '
+                    'being set to 0.0 due to no true samples.'
+                    ' Use ``zero_division`` parameter to control'
+                    ' this behavior.')
         else:
             assert len(record) == 0
 
@@ -1678,21 +1703,24 @@ def test_recall_warnings(zero_division):
         if zero_division == "warn":
             assert (str(record.pop().message) ==
                     'Recall is ill-defined and '
-                    'being set to 0.0 due to no true samples.')
+                    'being set to 0.0 due to no true samples.'
+                    ' Use ``zero_division`` parameter to control'
+                    ' this behavior.')
 
 
 @pytest.mark.parametrize('zero_division', ["warn", 0, 1])
 def test_precision_warnings(zero_division):
     with warnings.catch_warnings(record=True) as record:
         warnings.simplefilter('always')
-
         precision_score(np.array([[1, 1], [1, 1]]),
                         np.array([[0, 0], [0, 0]]),
                         average='micro', zero_division=zero_division)
         if zero_division == "warn":
             assert (str(record.pop().message) ==
                     'Precision is ill-defined and '
-                    'being set to 0.0 due to no predicted samples.')
+                    'being set to 0.0 due to no predicted samples.'
+                    ' Use ``zero_division`` parameter to control'
+                    ' this behavior.')
         else:
             assert len(record) == 0
 
@@ -1700,7 +1728,9 @@ def test_precision_warnings(zero_division):
         if zero_division == "warn":
             assert (str(record.pop().message) ==
                     'Precision is ill-defined and '
-                    'being set to 0.0 due to no predicted samples.')
+                    'being set to 0.0 due to no predicted samples.'
+                    ' Use ``zero_division`` parameter to control'
+                    ' this behavior.')
 
     assert_no_warnings(precision_score,
                        np.array([[0, 0], [0, 0]]),
@@ -1717,36 +1747,26 @@ def test_fscore_warnings(zero_division):
             score(np.array([[1, 1], [1, 1]]),
                   np.array([[0, 0], [0, 0]]),
                   average='micro', zero_division=zero_division)
-            if zero_division == "warn":
-                assert (str(record.pop().message) ==
-                        'F-score is ill-defined and '
-                        'being set to 0.0 due to no predicted samples.')
-            else:
-                assert len(record) == 0
+            assert len(record) == 0
 
             score(np.array([[0, 0], [0, 0]]),
                   np.array([[1, 1], [1, 1]]),
                   average='micro', zero_division=zero_division)
-            if zero_division == "warn":
-                assert_equal(str(record.pop().message),
-                             'F-score is ill-defined and '
-                             'being set to 0.0 due to no true samples.')
-            else:
-                assert len(record) == 0
+            assert len(record) == 0
 
-            score([0, 0], [0, 0])
+            score(np.array([[0, 0], [0, 0]]),
+                  np.array([[0, 0], [0, 0]]),
+                  average='micro', zero_division=zero_division)
             if zero_division == "warn":
                 assert (str(record.pop().message) ==
                         'F-score is ill-defined and '
-                        'being set to 0.0 due to no true samples.')
-                assert (str(record.pop().message) ==
-                        'F-score is ill-defined and '
-                        'being set to 0.0 due to no predicted samples.')
+                        'being set to 0.0 due to no true nor predicted '
+                        'samples. Use ``zero_division`` parameter to '
+                        'control this behavior.')
             else:
                 assert len(record) == 0
 
 
-
 def test_prf_average_binary_data_non_binary():
     # Error if user does not explicitly set non-binary average mode
     y_true_mc = [1, 2, 3, 3]
@@ -1902,7 +1922,7 @@ def test_hinge_loss_multiclass():
     np.clip(dummy_losses, 0, None, out=dummy_losses)
     dummy_hinge_loss = np.mean(dummy_losses)
     assert (hinge_loss(y_true, pred_decision) ==
-                 dummy_hinge_loss)
+            dummy_hinge_loss)
 
 
 def test_hinge_loss_multiclass_missing_labels_with_labels_none():
@@ -1940,7 +1960,7 @@ def test_hinge_loss_multiclass_with_missing_labels():
     np.clip(dummy_losses, 0, None, out=dummy_losses)
     dummy_hinge_loss = np.mean(dummy_losses)
     assert (hinge_loss(y_true, pred_decision, labels=labels) ==
-                 dummy_hinge_loss)
+            dummy_hinge_loss)
 
 
 def test_hinge_loss_multiclass_invariance_lists():
@@ -1967,7 +1987,7 @@ def test_hinge_loss_multiclass_invariance_lists():
     np.clip(dummy_losses, 0, None, out=dummy_losses)
     dummy_hinge_loss = np.mean(dummy_losses)
     assert (hinge_loss(y_true, pred_decision) ==
-                 dummy_hinge_loss)
+            dummy_hinge_loss)
 
 
 def test_log_loss():

From 29d1109c7461a3791be5030718b5802cec54e4a4 Mon Sep 17 00:00:00 2001
From: marctorrellas <mtorrellassocastro@gmail.com>
Date: Sat, 7 Sep 2019 13:31:54 +0100
Subject: [PATCH 03/10] fixed "[...0 0...]" --> "[...0, 0...]" in docstring

---
 sklearn/metrics/classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index 166e8a029aa1d..aceb7e7b208c9 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -1063,7 +1063,7 @@ def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
     0.26...
     >>> f1_score(y_true, y_pred, average=None)
     array([0.8, 0. , 0. ])
-    >>> y_true = [0, 0, 0 0, 0, 0]
+    >>> y_true = [0, 0, 0, 0, 0, 0]
     >>> y_pred = [0, 0, 0, 0, 0, 0]
     >>> f1_score(y_true, y_pred, zero_division=1)
     1.0...

From f4e85e69967d57a353c3a7ca35857ee08ba4ac37 Mon Sep 17 00:00:00 2001
From: marctorrellas <mtorrellassocastro@gmail.com>
Date: Sat, 7 Sep 2019 13:47:09 +0100
Subject: [PATCH 04/10] corrected docstring examples

---
 sklearn/metrics/classification.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index aceb7e7b208c9..8e27f265e76a1 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -1636,10 +1636,10 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1,
     >>> precision_score(y_true, y_pred, average=None)
     array([0.66..., 0.        , 0.        ])
     >>> y_pred = [0, 0, 0, 0, 0, 0]
-    >>> precision_score(y_true, y_pred)
-    0.0...
+    >>> precision_score(y_true, y_pred, average=None)
+    array([0.33..., 0.        , 0.        ])
     >>> precision_score(y_true, y_pred, zero_division=1)
-    1.0...
+    array([0.33..., 1.        , 1.        ])
 
     Notes
     -----
@@ -1755,10 +1755,10 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
     >>> recall_score(y_true, y_pred, average=None)
     array([1., 0., 0.])
     >>> y_true = [0, 0, 0, 0, 0, 0]
-    >>> recall_score(y_true, y_pred)
-    0.0...
-    >>> recall_score(y_true, y_pred, zero_division=1)
-    1.0...
+    >>> recall_score(y_true, y_pred, average=None)
+    array([0.5, 0., 0.])
+    >>> recall_score(y_true, y_pred, average=None, zero_division=1)
+    array([0.5, 1., 1.])
 
     Notes
     -----

From 446f878fe4330facdb620996384fb7f45be57951 Mon Sep 17 00:00:00 2001
From: marctorrellas <mtorrellassocastro@gmail.com>
Date: Sat, 7 Sep 2019 14:18:05 +0100
Subject: [PATCH 05/10] corrected docstring examples (again)

---
 sklearn/metrics/classification.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index 8e27f265e76a1..85b7a4de862a5 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -1638,7 +1638,7 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1,
     >>> y_pred = [0, 0, 0, 0, 0, 0]
     >>> precision_score(y_true, y_pred, average=None)
     array([0.33..., 0.        , 0.        ])
-    >>> precision_score(y_true, y_pred, zero_division=1)
+    >>> precision_score(y_true, y_pred, average=None, zero_division=1)
     array([0.33..., 1.        , 1.        ])
 
     Notes
@@ -1756,9 +1756,9 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
     array([1., 0., 0.])
     >>> y_true = [0, 0, 0, 0, 0, 0]
     >>> recall_score(y_true, y_pred, average=None)
-    array([0.5, 0., 0.])
+    array([0.5, 0. , 0. ])
     >>> recall_score(y_true, y_pred, average=None, zero_division=1)
-    array([0.5, 1., 1.])
+    array([0.5, 1. , 1. ])
 
     Notes
     -----

From e189423d32f793bdfbe67c9483b45096f8f4d123 Mon Sep 17 00:00:00 2001
From: marctorrellas <mtorrellassocastro@gmail.com>
Date: Wed, 11 Sep 2019 19:55:31 +0100
Subject: [PATCH 06/10] - tests for warn and [0,1] separated to make them more
 clear; - better docstrings - more explicit use of zero_division value

---
 sklearn/metrics/classification.py            |  60 +++---
 sklearn/metrics/tests/test_classification.py | 214 ++++++++++++-------
 2 files changed, 167 insertions(+), 107 deletions(-)

diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index 85b7a4de862a5..9ced21519dd5d 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -43,13 +43,13 @@
 
 def _check_zero_division(zero_division):
     if isinstance(zero_division, str):
-        if zero_division != "warn":
-            raise ValueError('zero_division must be one of ["warn", 0, 1]')
+        if zero_division == "warn":
+            return
     elif isinstance(zero_division, (int, float)):
-        if zero_division not in [0, 1]:
-            raise ValueError('zero_division must be one of ["warn", 0, 1]')
-    else:
-        raise TypeError('zero_division must be one of ["warn", 0, 1]')
+        if zero_division in [0, 1]:
+            return
+    raise ValueError(f'Got zero_division={zero_division}.'
+                     f' Must be one of ["warn", 0, 1]')
 
 
 def _check_targets(y_true, y_pred):
@@ -1029,10 +1029,9 @@ def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
         Sample weights.
 
     zero_division : "warn", 0 or 1, default="warn"
-        Sets the behavior when there is a zero division. If set to
-        ("warn"|0)/1, returns 0/1 when both precision and recall are zero
-        (calculated using the same value for this parameter).
-        If ``zero_division != "warn"``, warnings are suppressed
+        Sets the value to return when there is a zero division, i.e. when all
+        predictions and labels are negative. If set to "warn", this acts as 0,
+        but warnings are also raised.
 
     Returns
     -------
@@ -1154,10 +1153,9 @@ def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1,
         Sample weights.
 
     zero_division : "warn", 0 or 1, default="warn"
-        Sets the behavior when there is a zero division. If set to
-        ("warn"|0)/1, returns 0/1 when both precision and recall are zero
-        (calculated using the same value for this parameter).
-        If ``zero_division != "warn"``, warnings are suppressed
+        Sets the value to return when there is a zero division, i.e. when all
+        predictions and labels are negative. If set to "warn", this acts as 0,
+        but warnings are also raised.
 
     Returns
     -------
@@ -1231,7 +1229,7 @@ def _prf_divide(numerator, denominator, metric,
         return result
 
     # if ``zero_division=1``, set those with denominator == 0 equal to 1
-    result[mask] = float(zero_division == 1)
+    result[mask] = 0.0 if zero_division in ["warn", 0] else 1.0
 
     # the user will be removing warnings if zero_division is set to something
     # different than its default value. If we are computing only f-score
@@ -1398,10 +1396,11 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
         Sample weights.
 
     zero_division : "warn", 0 or 1, default="warn"
-        Sets the behavior when there is a zero division. If set to
-        ("warn"|0)/1, returns 0/1 for precision, recall, and F-measure when
-        their computation implies a zero division. If
-        ``zero_division != "warn"``, warnings are suppressed
+        Sets the value to return when there is a zero division:
+           - recall: when there are no positive labels
+           - precision: when there are no positive predictions
+           - f-score: both
+        If set to "warn", this acts as 0, but warnings are also raised.
 
     Returns
     -------
@@ -1514,13 +1513,14 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
     if average == 'weighted':
         weights = true_sum
         if weights.sum() == 0:
+            zero_division_value = 0.0 if zero_division in ["warn", 0] else 1.0
             # precision is zero_division if there are no positive predictions
             # recall is zero_division if there are no positive labels
             # fscore is zero_division if all labels AND predictions are
             # negative
-            return (float(zero_division == 1) if pred_sum.sum() == 0 else 0,
-                    float(zero_division == 1),
-                    float(zero_division == 1) if pred_sum.sum() == 0 else 0,
+            return (zero_division_value if pred_sum.sum() == 0 else 0,
+                    zero_division_value,
+                    zero_division_value if pred_sum.sum() == 0 else 0,
                     None)
 
     elif average == 'samples':
@@ -1607,9 +1607,8 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1,
         Sample weights.
 
     zero_division : "warn", 0 or 1, default="warn"
-        Sets the behavior when there is a zero division. If set to
-        ("warn"|0)/1, returns 0/1 when there are no positive predictions.
-        If ``zero_division != "warn"``, warnings are suppressed
+        Sets the value to return when there is a zero division. If set to
+        "warn", this acts as 0, but warnings are also raised.
 
     Returns
     -------
@@ -1725,9 +1724,8 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
         Sample weights.
 
     zero_division : "warn", 0 or 1, default="warn"
-        Sets the behavior when there is a zero division. If set to
-        ("warn"|0)/1, returns 0/1 when there are no positive labels.
-        If ``zero_division != "warn"``, warnings are suppressed
+        Sets the value to return when there is a zero division. If set to
+        "warn", this acts as 0, but warnings are also raised.
 
     Returns
     -------
@@ -1886,10 +1884,8 @@ def classification_report(y_true, y_pred, labels=None, target_names=None,
         If True, return output as dict
 
     zero_division : "warn", 0 or 1, default="warn"
-        Sets the behavior when there is a zero division. If set to
-        ("warn"|0)/1, returns 0/1 for precision, recall, and f1 when their
-        computation implies a zero division. If ``zero_division != "warn"``,
-        warnings are suppressed
+        Sets the value to return when there is a zero division. If set to
+        "warn", this acts as 0, but warnings are also raised.
 
     Returns
     -------
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index e04cb7f91dc62..21cff91b29827 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -1511,38 +1511,50 @@ def test_precision_recall_f1_score_with_an_empty_prediction():
 
 @pytest.mark.parametrize('beta', [1])
 @pytest.mark.parametrize('average', ["macro", "micro", "weighted", "samples"])
-@pytest.mark.parametrize('zero_division', ["warn", 0, 1])
+@pytest.mark.parametrize('zero_division', [0, 1])
 def test_precision_recall_f1_no_labels(beta, average, zero_division):
     y_true = np.zeros((20, 3))
     y_pred = np.zeros_like(y_true)
 
-    func = precision_recall_fscore_support
-    my_assert = (assert_warns if zero_division == "warn"
-                 else assert_no_warnings)
-    tmp = ([UndefinedMetricWarning, func] if zero_division == "warn"
-           else [func])
-    p, r, f, s = my_assert(*tmp, y_true, y_pred, average=average,
-                           beta=beta, zero_division=zero_division)
-    tmp = ([UndefinedMetricWarning, fbeta_score] if zero_division == "warn"
-           else [fbeta_score])
-    fbeta = my_assert(*tmp, y_true, y_pred, beta=beta,
-                      average=average, zero_division=zero_division)
-
-    zero_division = float(zero_division == 1)
+    p, r, f, s = assert_no_warnings(precision_recall_fscore_support, y_true,
+                                    y_pred, average=average, beta=beta,
+                                    zero_division=zero_division)
+    fbeta = assert_no_warnings(fbeta_score, y_true, y_pred, beta=beta,
+                               average=average, zero_division=zero_division)
+
+    zero_division = float(zero_division)
     assert_almost_equal(p, zero_division)
     assert_almost_equal(r, zero_division)
     assert_almost_equal(f, zero_division)
     assert s is None
 
-    assert_almost_equal(fbeta, float(zero_division == 1))
+    assert_almost_equal(fbeta, float(zero_division))
 
 
-@pytest.mark.parametrize('zero_division', ["warn", 0, 1])
-def test_precision_recall_f1_no_labels_average_none(zero_division):
+@pytest.mark.parametrize('beta', [1])
+@pytest.mark.parametrize('average', ["macro", "micro", "weighted", "samples"])
+def test_precision_recall_f1_no_labels_warn(beta, average):
     y_true = np.zeros((20, 3))
     y_pred = np.zeros_like(y_true)
 
-    beta = 1
+    func = precision_recall_fscore_support
+    p, r, f, s = assert_warns(UndefinedMetricWarning, func, y_true, y_pred,
+                              average=average, beta=beta)
+    fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred,
+                         average=average, beta=beta)
+
+    assert_almost_equal(p, 0)
+    assert_almost_equal(r, 0)
+    assert_almost_equal(f, 0)
+    assert s is None
+
+    assert_almost_equal(fbeta, 0)
+
+
+@pytest.mark.parametrize('zero_division', [0, 1])
+def test_precision_recall_f1_no_labels_average_none(zero_division):
+    y_true = np.zeros((20, 3))
+    y_pred = np.zeros_like(y_true)
 
     # tp = [0, 0, 0]
     # fn = [0, 0, 0]
@@ -1552,19 +1564,14 @@ def test_precision_recall_f1_no_labels_average_none(zero_division):
     # |y_i| = [0, 0, 0]
     # |y_hat_i| = [0, 0, 0]
 
-    func = precision_recall_fscore_support
-    my_assert = (assert_warns if zero_division == "warn"
-                 else assert_no_warnings)
-    tmp = ([UndefinedMetricWarning, func] if zero_division == "warn"
-           else [func])
-    p, r, f, s = my_assert(*tmp, y_true, y_pred, average=None,
-                           beta=beta, zero_division=zero_division)
-    tmp = ([UndefinedMetricWarning, fbeta_score] if zero_division == "warn"
-           else [fbeta_score])
-    fbeta = my_assert(*tmp, y_true, y_pred, beta=beta,
-                      average=None, zero_division=zero_division)
-
-    zero_division = float(zero_division == 1)
+    p, r, f, s = assert_no_warnings(precision_recall_fscore_support,
+                                    y_true, y_pred,
+                                    average=None, beta=1,
+                                    zero_division=zero_division)
+    fbeta = assert_no_warnings(fbeta_score, y_true, y_pred, beta=1,
+                               average=None, zero_division=zero_division)
+
+    zero_division = float(zero_division)
     assert_array_almost_equal(
         p, [zero_division, zero_division, zero_division], 2
     )
@@ -1581,102 +1588,159 @@ def test_precision_recall_f1_no_labels_average_none(zero_division):
     )
 
 
-@pytest.mark.parametrize('zero_division', ["warn", 0, 1])
+def test_precision_recall_f1_no_labels_average_none_warn():
+    y_true = np.zeros((20, 3))
+    y_pred = np.zeros_like(y_true)
+
+    # tp = [0, 0, 0]
+    # fn = [0, 0, 0]
+    # fp = [0, 0, 0]
+    # support = [0, 0, 0]
+    # |y_hat_i inter y_i | = [0, 0, 0]
+    # |y_i| = [0, 0, 0]
+    # |y_hat_i| = [0, 0, 0]
+
+    p, r, f, s = assert_warns(UndefinedMetricWarning,
+                              precision_recall_fscore_support,
+                              y_true, y_pred, average=None, beta=1)
+    fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred,
+                         beta=1, average=None)
+
+    assert_array_almost_equal(p, [0, 0, 0], 2)
+    assert_array_almost_equal(r, [0, 0, 0], 2)
+    assert_array_almost_equal(f, [0, 0, 0], 2)
+    assert_array_almost_equal(s, [0, 0, 0], 2)
+
+    assert_array_almost_equal(fbeta, [0, 0, 0], 2)
+
+
+@pytest.mark.parametrize('zero_division', ["warn"])
 def test_prf_warnings(zero_division):
     # average of per-label scores
     f, w = precision_recall_fscore_support, UndefinedMetricWarning
-    my_assert = (assert_warns_message
-                 if zero_division == "warn" else assert_no_warnings)
     for average in [None, 'weighted', 'macro']:
 
         msg = ('Precision and F-score are ill-defined and '
                'being set to 0.0 in labels with no predicted samples.'
                ' Use ``zero_division`` parameter to control'
                ' this behavior.')
-        tmp = [w, msg, f] if zero_division == "warn" else [f]
-        my_assert(*tmp, [0, 1, 2], [1, 1, 2], average=average,
-                  zero_division=zero_division)
+        assert_warns_message(w, msg, f, [0, 1, 2], [1, 1, 2], average=average,
+                             zero_division=zero_division)
 
         msg = ('Recall and F-score are ill-defined and '
                'being set to 0.0 in labels with no true samples.'
                ' Use ``zero_division`` parameter to control'
                ' this behavior.')
-        tmp = [w, msg, f] if zero_division == "warn" else [f]
-        my_assert(*tmp, [1, 1, 2], [0, 1, 2], average=average,
-                  zero_division=zero_division)
+        assert_warns_message(w, msg, f, [1, 1, 2], [0, 1, 2], average=average,
+                             zero_division=zero_division)
 
     # average of per-sample scores
     msg = ('Precision and F-score are ill-defined and '
            'being set to 0.0 in samples with no predicted labels.'
            ' Use ``zero_division`` parameter to control'
            ' this behavior.')
-    tmp = [w, msg, f] if zero_division == "warn" else [f]
-    my_assert(*tmp, np.array([[1, 0], [1, 0]]),
-              np.array([[1, 0], [0, 0]]), average='samples',
-              zero_division=zero_division)
+    assert_warns_message(w, msg, f, np.array([[1, 0], [1, 0]]),
+                         np.array([[1, 0], [0, 0]]), average='samples',
+                         zero_division=zero_division)
 
     msg = ('Recall and F-score are ill-defined and '
            'being set to 0.0 in samples with no true labels.'
            ' Use ``zero_division`` parameter to control'
            ' this behavior.')
-    tmp = [w, msg, f] if zero_division == "warn" else [f]
-    my_assert(*tmp, np.array([[1, 0], [0, 0]]),
-              np.array([[1, 0], [1, 0]]),
-              average='samples', zero_division=zero_division)
+    assert_warns_message(w, msg, f, np.array([[1, 0], [0, 0]]),
+                         np.array([[1, 0], [1, 0]]),
+                         average='samples', zero_division=zero_division)
 
     # single score: micro-average
     msg = ('Precision and F-score are ill-defined and '
            'being set to 0.0 due to no predicted samples.'
            ' Use ``zero_division`` parameter to control'
            ' this behavior.')
-    tmp = [w, msg, f] if zero_division == "warn" else [f]
-    my_assert(*tmp, np.array([[1, 1], [1, 1]]),
-              np.array([[0, 0], [0, 0]]), average='micro',
-              zero_division=zero_division)
+    assert_warns_message(w, msg, f, np.array([[1, 1], [1, 1]]),
+                         np.array([[0, 0], [0, 0]]), average='micro',
+                         zero_division=zero_division)
 
     msg = ('Recall and F-score are ill-defined and '
            'being set to 0.0 due to no true samples.'
            ' Use ``zero_division`` parameter to control'
            ' this behavior.')
-    tmp = [w, msg, f] if zero_division == "warn" else [f]
-    my_assert(*tmp, np.array([[0, 0], [0, 0]]),
-              np.array([[1, 1], [1, 1]]), average='micro',
-              zero_division=zero_division)
+    assert_warns_message(w, msg, f, np.array([[0, 0], [0, 0]]),
+                         np.array([[1, 1], [1, 1]]), average='micro',
+                         zero_division=zero_division)
 
     # single positive label
     msg = ('Precision and F-score are ill-defined and '
            'being set to 0.0 due to no predicted samples.'
            ' Use ``zero_division`` parameter to control'
            ' this behavior.')
-    tmp = [w, msg, f] if zero_division == "warn" else [f]
-    my_assert(*tmp, [1, 1], [-1, -1], average='binary',
-              zero_division=zero_division)
+    assert_warns_message(w, msg, f, [1, 1], [-1, -1], average='binary',
+                         zero_division=zero_division)
 
     msg = ('Recall and F-score are ill-defined and '
            'being set to 0.0 due to no true samples.'
            ' Use ``zero_division`` parameter to control'
            ' this behavior.')
-    tmp = [w, msg, f] if zero_division == "warn" else [f]
-    my_assert(*tmp, [-1, -1], [1, 1], average='binary',
-              zero_division=zero_division)
+    assert_warns_message(w, msg, f, [-1, -1], [1, 1], average='binary',
+                         zero_division=zero_division)
 
     with warnings.catch_warnings(record=True) as record:
         warnings.simplefilter('always')
         precision_recall_fscore_support([0, 0], [0, 0], average="binary",
                                         zero_division=zero_division)
-        if zero_division == "warn":
-            msg = ('Recall and F-score are ill-defined and '
-                   'being set to 0.0 due to no true samples.'
-                   ' Use ``zero_division`` parameter to control'
-                   ' this behavior.')
-            assert str(record.pop().message) == msg
-            msg = ('Precision and F-score are ill-defined and '
-                   'being set to 0.0 due to no predicted samples.'
-                   ' Use ``zero_division`` parameter to control'
-                   ' this behavior.')
-            assert str(record.pop().message) == msg
-        else:
-            assert len(record) == 0
+        msg = ('Recall and F-score are ill-defined and '
+               'being set to 0.0 due to no true samples.'
+               ' Use ``zero_division`` parameter to control'
+               ' this behavior.')
+        assert str(record.pop().message) == msg
+        msg = ('Precision and F-score are ill-defined and '
+               'being set to 0.0 due to no predicted samples.'
+               ' Use ``zero_division`` parameter to control'
+               ' this behavior.')
+        assert str(record.pop().message) == msg
+
+
+@pytest.mark.parametrize('zero_division', [0, 1])
+def test_prf_no_warnings_if_zero_division_set(zero_division):
+    # average of per-label scores
+    f = precision_recall_fscore_support
+    for average in [None, 'weighted', 'macro']:
+
+        assert_no_warnings(f, [0, 1, 2], [1, 1, 2], average=average,
+                           zero_division=zero_division)
+
+        assert_no_warnings(f, [1, 1, 2], [0, 1, 2], average=average,
+                           zero_division=zero_division)
+
+    # average of per-sample scores
+    assert_no_warnings(f, np.array([[1, 0], [1, 0]]),
+                       np.array([[1, 0], [0, 0]]), average='samples',
+                       zero_division=zero_division)
+
+    assert_no_warnings(f, np.array([[1, 0], [0, 0]]),
+                       np.array([[1, 0], [1, 0]]),
+                       average='samples', zero_division=zero_division)
+
+    # single score: micro-average
+    assert_no_warnings(f, np.array([[1, 1], [1, 1]]),
+                       np.array([[0, 0], [0, 0]]), average='micro',
+                       zero_division=zero_division)
+
+    assert_no_warnings(f, np.array([[0, 0], [0, 0]]),
+                       np.array([[1, 1], [1, 1]]), average='micro',
+                       zero_division=zero_division)
+
+    # single positive label
+    assert_no_warnings(f, [1, 1], [-1, -1], average='binary',
+                       zero_division=zero_division)
+
+    assert_no_warnings(f, [-1, -1], [1, 1], average='binary',
+                       zero_division=zero_division)
+
+    with warnings.catch_warnings(record=True) as record:
+        warnings.simplefilter('always')
+        precision_recall_fscore_support([0, 0], [0, 0], average="binary",
+                                        zero_division=zero_division)
+        assert len(record) == 0
 
 
 @pytest.mark.parametrize('zero_division', ["warn", 0, 1])

From 3befe311e15d2da34e14ace8feaa7e1582498baf Mon Sep 17 00:00:00 2001
From: marctorrellas <mtorrellassocastro@gmail.com>
Date: Tue, 24 Sep 2019 09:06:12 +0100
Subject: [PATCH 07/10] - removed fstring to make compatible with python<3.6

---
 sklearn/metrics/classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index b9bf84a9409b8..1120fbd593fe4 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -48,8 +48,8 @@ def _check_zero_division(zero_division):
     elif isinstance(zero_division, (int, float)):
         if zero_division in [0, 1]:
             return
-    raise ValueError(f'Got zero_division={zero_division}.'
-                     f' Must be one of ["warn", 0, 1]')
+    raise ValueError('Got zero_division={0}.'
+                     ' Must be one of ["warn", 0, 1]'.format(zero_division))
 
 
 def _check_targets(y_true, y_pred):

From 7a2bc7db466ffec6f6f2338a17a995b10e7a221b Mon Sep 17 00:00:00 2001
From: marctorrellas <mtorrellassocastro@gmail.com>
Date: Wed, 25 Sep 2019 09:17:14 +0100
Subject: [PATCH 08/10] - reverted changes to avoid flake8 warnings - added
 tests for YTN or YPN to check prec/rec with zero_division value - cleaner
 tests

---
 sklearn/metrics/classification.py            |  3 +-
 sklearn/metrics/tests/test_classification.py | 96 ++++++++++----------
 2 files changed, 48 insertions(+), 51 deletions(-)

diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index 1120fbd593fe4..5a5f75dffc37b 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -2176,8 +2176,7 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None,
     y_true : array-like or label indicator matrix
         Ground truth (correct) labels for n_samples samples.
 
-    y_pred : array-like of float, shape = (n_samples, n_classes) or
-        (n_samples,)
+    y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)
         Predicted probabilities, as returned by a classifier's
         predict_proba method. If ``y_pred.shape = (n_samples,)``
         the probabilities provided are assumed to be that of the
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 42b5fa7867388..9a093003e9983 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -534,12 +534,10 @@ def test_cohen_kappa():
     y1 = np.array([0] * 46 + [1] * 44 + [2] * 10)
     y2 = np.array([0] * 50 + [1] * 40 + [2] * 10)
     assert_almost_equal(cohen_kappa_score(y1, y2), .9315, decimal=4)
-    assert_almost_equal(
-        cohen_kappa_score(y1, y2, weights="linear"), 0.9412, decimal=4
-    )
-    assert_almost_equal(
-        cohen_kappa_score(y1, y2, weights="quadratic"), 0.9541, decimal=4
-    )
+    assert_almost_equal(cohen_kappa_score(y1, y2,
+                        weights="linear"), 0.9412, decimal=4)
+    assert_almost_equal(cohen_kappa_score(y1, y2,
+                        weights="quadratic"), 0.9541, decimal=4)
 
 
 @ignore_warnings
@@ -1452,28 +1450,33 @@ def test_precision_recall_f1_score_multilabel_2():
 
 
 @ignore_warnings
-def test_precision_recall_f1_score_with_an_empty_prediction():
+@pytest.mark.parametrize('zero_division', ["warn", 0, 1])
+def test_precision_recall_f1_score_with_an_empty_prediction(zero_division):
     y_true = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 1, 0]])
     y_pred = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 1, 0]])
 
     # true_pos = [ 0.  1.  1.  0.]
     # false_pos = [ 0.  0.  0.  1.]
     # false_neg = [ 1.  1.  0.  0.]
+    zero_division = 1.0 if zero_division == 1.0 else 0.0
     p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
-                                                 average=None)
-    assert_array_almost_equal(p, [0.0, 1.0, 1.0, 0.0], 2)
-    assert_array_almost_equal(r, [0.0, 0.5, 1.0, 0.0], 2)
+                                                 average=None,
+                                                 zero_division=zero_division)
+    assert_array_almost_equal(p, [zero_division, 1.0, 1.0, 0.0], 2)
+    assert_array_almost_equal(r, [0.0, 0.5, 1.0, zero_division], 2)
     assert_array_almost_equal(f, [0.0, 1 / 1.5, 1, 0.0], 2)
     assert_array_almost_equal(s, [1, 2, 1, 0], 2)
 
-    f2 = fbeta_score(y_true, y_pred, beta=2, average=None)
+    f2 = fbeta_score(y_true, y_pred, beta=2, average=None,
+                     zero_division=zero_division)
     support = s
     assert_array_almost_equal(f2, [0, 0.55, 1, 0], 2)
 
     p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
-                                                 average="macro")
-    assert_almost_equal(p, 0.5)
-    assert_almost_equal(r, 1.5 / 4)
+                                                 average="macro",
+                                                 zero_division=zero_division)
+    assert_almost_equal(p, (2 + zero_division) / 4)
+    assert_almost_equal(r, (1.5 + zero_division) / 4)
     assert_almost_equal(f, 2.5 / (4 * 1.5))
     assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
@@ -1481,24 +1484,29 @@ def test_precision_recall_f1_score_with_an_empty_prediction():
                         np.mean(f2))
 
     p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
-                                                 average="micro")
+                                                 average="micro",
+                                                 zero_division=zero_division)
     assert_almost_equal(p, 2 / 3)
     assert_almost_equal(r, 0.5)
     assert_almost_equal(f, 2 / 3 / (2 / 3 + 0.5))
     assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
-                                    average="micro"),
+                                    average="micro",
+                                    zero_division=zero_division),
                         (1 + 4) * p * r / (4 * p + r))
 
     p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
-                                                 average="weighted")
-    assert_almost_equal(p, 3 / 4)
+                                                 average="weighted",
+                                                 zero_division=zero_division)
+    assert_almost_equal(p, 3 / 4 if zero_division == 0 else 1.0)
     assert_almost_equal(r, 0.5)
     assert_almost_equal(f, (2 / 1.5 + 1) / 4)
     assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
-                                    average="weighted"),
-                        np.average(f2, weights=support))
+                                    average="weighted",
+                                    zero_division=zero_division),
+                        np.average(f2, weights=support),
+                        )
 
     p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                  average="samples")
@@ -1510,7 +1518,8 @@ def test_precision_recall_f1_score_with_an_empty_prediction():
     assert_almost_equal(f, 1 / 3)
     assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
-                                    average="samples"),
+                                    average="samples",
+                                    zero_division=zero_division),
                         0.333, 2)
 
 
@@ -1536,17 +1545,16 @@ def test_precision_recall_f1_no_labels(beta, average, zero_division):
     assert_almost_equal(fbeta, float(zero_division))
 
 
-@pytest.mark.parametrize('beta', [1])
 @pytest.mark.parametrize('average', ["macro", "micro", "weighted", "samples"])
-def test_precision_recall_f1_no_labels_warn(beta, average):
+def test_precision_recall_f1_no_labels_check_warnings(average):
     y_true = np.zeros((20, 3))
     y_pred = np.zeros_like(y_true)
 
     func = precision_recall_fscore_support
     p, r, f, s = assert_warns(UndefinedMetricWarning, func, y_true, y_pred,
-                              average=average, beta=beta)
+                              average=average, beta=1.0)
     fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred,
-                         average=average, beta=beta)
+                         average=average, beta=1.0)
 
     assert_almost_equal(p, 0)
     assert_almost_equal(r, 0)
@@ -1571,9 +1579,9 @@ def test_precision_recall_f1_no_labels_average_none(zero_division):
 
     p, r, f, s = assert_no_warnings(precision_recall_fscore_support,
                                     y_true, y_pred,
-                                    average=None, beta=1,
+                                    average=None, beta=1.0,
                                     zero_division=zero_division)
-    fbeta = assert_no_warnings(fbeta_score, y_true, y_pred, beta=1,
+    fbeta = assert_no_warnings(fbeta_score, y_true, y_pred, beta=1.0,
                                average=None, zero_division=zero_division)
 
     zero_division = float(zero_division)
@@ -1619,8 +1627,7 @@ def test_precision_recall_f1_no_labels_average_none_warn():
     assert_array_almost_equal(fbeta, [0, 0, 0], 2)
 
 
-@pytest.mark.parametrize('zero_division', ["warn"])
-def test_prf_warnings(zero_division):
+def test_prf_warnings():
     # average of per-label scores
     f, w = precision_recall_fscore_support, UndefinedMetricWarning
     for average in [None, 'weighted', 'macro']:
@@ -1629,15 +1636,13 @@ def test_prf_warnings(zero_division):
                'being set to 0.0 in labels with no predicted samples.'
                ' Use ``zero_division`` parameter to control'
                ' this behavior.')
-        assert_warns_message(w, msg, f, [0, 1, 2], [1, 1, 2], average=average,
-                             zero_division=zero_division)
+        assert_warns_message(w, msg, f, [0, 1, 2], [1, 1, 2], average=average)
 
         msg = ('Recall and F-score are ill-defined and '
                'being set to 0.0 in labels with no true samples.'
                ' Use ``zero_division`` parameter to control'
                ' this behavior.')
-        assert_warns_message(w, msg, f, [1, 1, 2], [0, 1, 2], average=average,
-                             zero_division=zero_division)
+        assert_warns_message(w, msg, f, [1, 1, 2], [0, 1, 2], average=average)
 
     # average of per-sample scores
     msg = ('Precision and F-score are ill-defined and '
@@ -1645,16 +1650,14 @@ def test_prf_warnings(zero_division):
            ' Use ``zero_division`` parameter to control'
            ' this behavior.')
     assert_warns_message(w, msg, f, np.array([[1, 0], [1, 0]]),
-                         np.array([[1, 0], [0, 0]]), average='samples',
-                         zero_division=zero_division)
+                         np.array([[1, 0], [0, 0]]), average='samples')
 
     msg = ('Recall and F-score are ill-defined and '
            'being set to 0.0 in samples with no true labels.'
            ' Use ``zero_division`` parameter to control'
            ' this behavior.')
     assert_warns_message(w, msg, f, np.array([[1, 0], [0, 0]]),
-                         np.array([[1, 0], [1, 0]]),
-                         average='samples', zero_division=zero_division)
+                         np.array([[1, 0], [1, 0]]), average='samples')
 
     # single score: micro-average
     msg = ('Precision and F-score are ill-defined and '
@@ -1662,36 +1665,31 @@ def test_prf_warnings(zero_division):
            ' Use ``zero_division`` parameter to control'
            ' this behavior.')
     assert_warns_message(w, msg, f, np.array([[1, 1], [1, 1]]),
-                         np.array([[0, 0], [0, 0]]), average='micro',
-                         zero_division=zero_division)
+                         np.array([[0, 0], [0, 0]]), average='micro')
 
     msg = ('Recall and F-score are ill-defined and '
            'being set to 0.0 due to no true samples.'
            ' Use ``zero_division`` parameter to control'
            ' this behavior.')
     assert_warns_message(w, msg, f, np.array([[0, 0], [0, 0]]),
-                         np.array([[1, 1], [1, 1]]), average='micro',
-                         zero_division=zero_division)
+                         np.array([[1, 1], [1, 1]]), average='micro')
 
     # single positive label
     msg = ('Precision and F-score are ill-defined and '
            'being set to 0.0 due to no predicted samples.'
            ' Use ``zero_division`` parameter to control'
            ' this behavior.')
-    assert_warns_message(w, msg, f, [1, 1], [-1, -1], average='binary',
-                         zero_division=zero_division)
+    assert_warns_message(w, msg, f, [1, 1], [-1, -1], average='binary')
 
     msg = ('Recall and F-score are ill-defined and '
            'being set to 0.0 due to no true samples.'
            ' Use ``zero_division`` parameter to control'
            ' this behavior.')
-    assert_warns_message(w, msg, f, [-1, -1], [1, 1], average='binary',
-                         zero_division=zero_division)
+    assert_warns_message(w, msg, f, [-1, -1], [1, 1], average='binary')
 
     with warnings.catch_warnings(record=True) as record:
         warnings.simplefilter('always')
-        precision_recall_fscore_support([0, 0], [0, 0], average="binary",
-                                        zero_division=zero_division)
+        precision_recall_fscore_support([0, 0], [0, 0], average="binary")
         msg = ('Recall and F-score are ill-defined and '
                'being set to 0.0 due to no true samples.'
                ' Use ``zero_division`` parameter to control'
@@ -1993,7 +1991,7 @@ def test_hinge_loss_multiclass():
     np.clip(dummy_losses, 0, None, out=dummy_losses)
     dummy_hinge_loss = np.mean(dummy_losses)
     assert (hinge_loss(y_true, pred_decision) ==
-            dummy_hinge_loss)
+                 dummy_hinge_loss)
 
 
 def test_hinge_loss_multiclass_missing_labels_with_labels_none():
@@ -2030,7 +2028,7 @@ def test_hinge_loss_multiclass_with_missing_labels():
     np.clip(dummy_losses, 0, None, out=dummy_losses)
     dummy_hinge_loss = np.mean(dummy_losses)
     assert (hinge_loss(y_true, pred_decision, labels=labels) ==
-            dummy_hinge_loss)
+                 dummy_hinge_loss)
 
 
 def test_hinge_loss_multiclass_invariance_lists():

From 78911568a66a8cf065012c9790321c0ff3b5b8d0 Mon Sep 17 00:00:00 2001
From: marctorrellas <mtorrellassocastro@gmail.com>
Date: Wed, 25 Sep 2019 22:12:28 +0100
Subject: [PATCH 09/10] small refactoring requested by @thomasjpfan

---
 sklearn/metrics/classification.py            | 25 +++++++---------
 sklearn/metrics/tests/test_classification.py | 30 ++++++++++----------
 2 files changed, 25 insertions(+), 30 deletions(-)

diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index 5a5f75dffc37b..fdbfd52425a41 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -42,12 +42,10 @@
 
 
 def _check_zero_division(zero_division):
-    if isinstance(zero_division, str):
-        if zero_division == "warn":
-            return
-    elif isinstance(zero_division, (int, float)):
-        if zero_division in [0, 1]:
-            return
+    if isinstance(zero_division, str) and zero_division == "warn":
+        return
+    elif isinstance(zero_division, (int, float)) and zero_division in [0, 1]:
+        return
     raise ValueError('Got zero_division={0}.'
                      ' Must be one of ["warn", 0, 1]'.format(zero_division))
 
@@ -1251,25 +1249,23 @@ def _prf_divide(numerator, denominator, metric,
     else:
         return result
 
-    msg = _build_prf_warning_message(average, modifier, msg_start, len(result))
+    _warn_prf(average, modifier, msg_start, len(result))
 
-    warnings.warn(msg, UndefinedMetricWarning, stacklevel=2)
     return result
 
 
-def _build_prf_warning_message(average, modifier, msg_start, result_size):
-    axis0 = 'sample'
-    axis1 = 'label'
+def _warn_prf(average, modifier, msg_start, result_size):
+    axis0, axis1 = 'sample', 'label'
     if average == 'samples':
         axis0, axis1 = axis1, axis0
     msg = ('{0} ill-defined and being set to 0.0 {{0}} '
-           'no {1} {2}s. Use ``zero_division`` parameter to control'
+           'no {1} {2}s. Use `zero_division` parameter to control'
            ' this behavior.'.format(msg_start, modifier, axis0))
     if result_size == 1:
         msg = msg.format('due to')
     else:
         msg = msg.format('in {0}s with'.format(axis1))
-    return msg
+    warnings.warn(msg, UndefinedMetricWarning, stacklevel=2)
 
 
 def _check_set_wise_labels(y_true, y_pred, average, labels, pos_label):
@@ -1494,10 +1490,9 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
     # and BOTH prec and rec are ill-defined
     if zero_division == "warn" and ("f-score",) == warn_for:
         if (pred_sum[true_sum == 0] == 0).any():
-            msg = _build_prf_warning_message(
+            _warn_prf(
                 average, "true nor predicted", 'F-score is', len(true_sum)
             )
-            warnings.warn(msg, UndefinedMetricWarning, stacklevel=2)
 
     # if tp == 0 F will be 1 only if all predictions are zero, all labels are
     # zero, and zero_division=1. In all other case, 0
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 9a093003e9983..29eb44d149194 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -1634,27 +1634,27 @@ def test_prf_warnings():
 
         msg = ('Precision and F-score are ill-defined and '
                'being set to 0.0 in labels with no predicted samples.'
-               ' Use ``zero_division`` parameter to control'
+               ' Use `zero_division` parameter to control'
                ' this behavior.')
         assert_warns_message(w, msg, f, [0, 1, 2], [1, 1, 2], average=average)
 
         msg = ('Recall and F-score are ill-defined and '
                'being set to 0.0 in labels with no true samples.'
-               ' Use ``zero_division`` parameter to control'
+               ' Use `zero_division` parameter to control'
                ' this behavior.')
         assert_warns_message(w, msg, f, [1, 1, 2], [0, 1, 2], average=average)
 
     # average of per-sample scores
     msg = ('Precision and F-score are ill-defined and '
            'being set to 0.0 in samples with no predicted labels.'
-           ' Use ``zero_division`` parameter to control'
+           ' Use `zero_division` parameter to control'
            ' this behavior.')
     assert_warns_message(w, msg, f, np.array([[1, 0], [1, 0]]),
                          np.array([[1, 0], [0, 0]]), average='samples')
 
     msg = ('Recall and F-score are ill-defined and '
            'being set to 0.0 in samples with no true labels.'
-           ' Use ``zero_division`` parameter to control'
+           ' Use `zero_division` parameter to control'
            ' this behavior.')
     assert_warns_message(w, msg, f, np.array([[1, 0], [0, 0]]),
                          np.array([[1, 0], [1, 0]]), average='samples')
@@ -1662,14 +1662,14 @@ def test_prf_warnings():
     # single score: micro-average
     msg = ('Precision and F-score are ill-defined and '
            'being set to 0.0 due to no predicted samples.'
-           ' Use ``zero_division`` parameter to control'
+           ' Use `zero_division` parameter to control'
            ' this behavior.')
     assert_warns_message(w, msg, f, np.array([[1, 1], [1, 1]]),
                          np.array([[0, 0], [0, 0]]), average='micro')
 
     msg = ('Recall and F-score are ill-defined and '
            'being set to 0.0 due to no true samples.'
-           ' Use ``zero_division`` parameter to control'
+           ' Use `zero_division` parameter to control'
            ' this behavior.')
     assert_warns_message(w, msg, f, np.array([[0, 0], [0, 0]]),
                          np.array([[1, 1], [1, 1]]), average='micro')
@@ -1677,13 +1677,13 @@ def test_prf_warnings():
     # single positive label
     msg = ('Precision and F-score are ill-defined and '
            'being set to 0.0 due to no predicted samples.'
-           ' Use ``zero_division`` parameter to control'
+           ' Use `zero_division` parameter to control'
            ' this behavior.')
     assert_warns_message(w, msg, f, [1, 1], [-1, -1], average='binary')
 
     msg = ('Recall and F-score are ill-defined and '
            'being set to 0.0 due to no true samples.'
-           ' Use ``zero_division`` parameter to control'
+           ' Use `zero_division` parameter to control'
            ' this behavior.')
     assert_warns_message(w, msg, f, [-1, -1], [1, 1], average='binary')
 
@@ -1692,12 +1692,12 @@ def test_prf_warnings():
         precision_recall_fscore_support([0, 0], [0, 0], average="binary")
         msg = ('Recall and F-score are ill-defined and '
                'being set to 0.0 due to no true samples.'
-               ' Use ``zero_division`` parameter to control'
+               ' Use `zero_division` parameter to control'
                ' this behavior.')
         assert str(record.pop().message) == msg
         msg = ('Precision and F-score are ill-defined and '
                'being set to 0.0 due to no predicted samples.'
-               ' Use ``zero_division`` parameter to control'
+               ' Use `zero_division` parameter to control'
                ' this behavior.')
         assert str(record.pop().message) == msg
 
@@ -1761,7 +1761,7 @@ def test_recall_warnings(zero_division):
             assert (str(record.pop().message) ==
                     'Recall is ill-defined and '
                     'being set to 0.0 due to no true samples.'
-                    ' Use ``zero_division`` parameter to control'
+                    ' Use `zero_division` parameter to control'
                     ' this behavior.')
         else:
             assert len(record) == 0
@@ -1771,7 +1771,7 @@ def test_recall_warnings(zero_division):
             assert (str(record.pop().message) ==
                     'Recall is ill-defined and '
                     'being set to 0.0 due to no true samples.'
-                    ' Use ``zero_division`` parameter to control'
+                    ' Use `zero_division` parameter to control'
                     ' this behavior.')
 
 
@@ -1786,7 +1786,7 @@ def test_precision_warnings(zero_division):
             assert (str(record.pop().message) ==
                     'Precision is ill-defined and '
                     'being set to 0.0 due to no predicted samples.'
-                    ' Use ``zero_division`` parameter to control'
+                    ' Use `zero_division` parameter to control'
                     ' this behavior.')
         else:
             assert len(record) == 0
@@ -1796,7 +1796,7 @@ def test_precision_warnings(zero_division):
             assert (str(record.pop().message) ==
                     'Precision is ill-defined and '
                     'being set to 0.0 due to no predicted samples.'
-                    ' Use ``zero_division`` parameter to control'
+                    ' Use `zero_division` parameter to control'
                     ' this behavior.')
 
     assert_no_warnings(precision_score,
@@ -1828,7 +1828,7 @@ def test_fscore_warnings(zero_division):
                 assert (str(record.pop().message) ==
                         'F-score is ill-defined and '
                         'being set to 0.0 due to no true nor predicted '
-                        'samples. Use ``zero_division`` parameter to '
+                        'samples. Use `zero_division` parameter to '
                         'control this behavior.')
             else:
                 assert len(record) == 0

From 3d3760e54ef0363aef44537d7ac092bc2c3dd1cd Mon Sep 17 00:00:00 2001
From: marctorrellas <mtorrellassocastro@gmail.com>
Date: Thu, 10 Oct 2019 23:58:18 +0100
Subject: [PATCH 10/10] assert_warns --> pytest.warns

---
 sklearn/metrics/tests/test_classification.py | 21 +++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 29eb44d149194..f668b253b553b 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -1551,16 +1551,17 @@ def test_precision_recall_f1_no_labels_check_warnings(average):
     y_pred = np.zeros_like(y_true)
 
     func = precision_recall_fscore_support
-    p, r, f, s = assert_warns(UndefinedMetricWarning, func, y_true, y_pred,
-                              average=average, beta=1.0)
-    fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred,
-                         average=average, beta=1.0)
+    with pytest.warns(UndefinedMetricWarning):
+        p, r, f, s = func(y_true, y_pred, average=average, beta=1.0)
 
     assert_almost_equal(p, 0)
     assert_almost_equal(r, 0)
     assert_almost_equal(f, 0)
     assert s is None
 
+    with pytest.warns(UndefinedMetricWarning):
+        fbeta = fbeta_score(y_true, y_pred, average=average, beta=1.0)
+
     assert_almost_equal(fbeta, 0)
 
 
@@ -1613,17 +1614,19 @@ def test_precision_recall_f1_no_labels_average_none_warn():
     # |y_i| = [0, 0, 0]
     # |y_hat_i| = [0, 0, 0]
 
-    p, r, f, s = assert_warns(UndefinedMetricWarning,
-                              precision_recall_fscore_support,
-                              y_true, y_pred, average=None, beta=1)
-    fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred,
-                         beta=1, average=None)
+    with pytest.warns(UndefinedMetricWarning):
+        p, r, f, s = precision_recall_fscore_support(
+            y_true, y_pred, average=None, beta=1
+        )
 
     assert_array_almost_equal(p, [0, 0, 0], 2)
     assert_array_almost_equal(r, [0, 0, 0], 2)
     assert_array_almost_equal(f, [0, 0, 0], 2)
     assert_array_almost_equal(s, [0, 0, 0], 2)
 
+    with pytest.warns(UndefinedMetricWarning):
+        fbeta = fbeta_score(y_true, y_pred, beta=1, average=None)
+
     assert_array_almost_equal(fbeta, [0, 0, 0], 2)