From 6ce5f185845c19107f5d9394253072c18af70cb6 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Fri, 2 Sep 2016 15:49:33 +1000 Subject: [PATCH 1/6] FIX use high precision cumsum and check it is stable enough --- sklearn/metrics/ranking.py | 5 +++-- sklearn/utils/extmath.py | 11 +++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 03bee6e5064f1..8a1ee9996490e 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -27,6 +27,7 @@ from ..utils import check_consistent_length from ..utils import column_or_1d, check_array from ..utils.multiclass import type_of_target +from ..utils.extmath import stable_cumsum from ..utils.fixes import isclose from ..utils.fixes import bincount from ..utils.fixes import array_equal @@ -337,9 +338,9 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None): threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1] # accumulate the true positives with decreasing threshold - tps = (y_true * weight).cumsum()[threshold_idxs] + tps = stable_cumsum(y_true * weight)[threshold_idxs] if sample_weight is not None: - fps = weight.cumsum()[threshold_idxs] - tps + fps = stable_cumsum(weight)[threshold_idxs] - tps else: fps = 1 + threshold_idxs - tps return fps, tps, y_score[threshold_idxs] diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index ab67c89e09525..64b84fb43493a 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -851,3 +851,14 @@ def _deterministic_vector_sign_flip(u): signs = np.sign(u[range(u.shape[0]), max_abs_rows]) u *= signs[:, np.newaxis] return u + + +def stable_cumsum(arr): + """Use high precision for cumsum and check that final value matches sum + """ + out = np.cumsum(arr, dtype=np.float64) + expected = np.sum(arr, dtype=np.float64) + if not np.allclose(out, expected): + raise RuntimeError('cumsum was found to be unstable: ' + 'its results do not correspond to sum') + return out From bd19e30f462d6a3538ce0034fa922dfd267cd795 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sat, 3 Sep 2016 21:58:22 +1000 Subject: [PATCH 2/6] FIX corrected code --- sklearn/utils/extmath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index 64b84fb43493a..a1bade211ba0c 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -858,7 +858,7 @@ def stable_cumsum(arr): """ out = np.cumsum(arr, dtype=np.float64) expected = np.sum(arr, dtype=np.float64) - if not np.allclose(out, expected): + if not np.allclose(out[-1], expected): raise RuntimeError('cumsum was found to be unstable: ' 'its results do not correspond to sum') return out From f58fdf7d57286f8288a5db66724a8500a5ecf1aa Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sat, 3 Sep 2016 21:59:17 +1000 Subject: [PATCH 3/6] Improved wording of error message --- sklearn/utils/extmath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index a1bade211ba0c..ed2d2470b7737 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -860,5 +860,5 @@ def stable_cumsum(arr): expected = np.sum(arr, dtype=np.float64) if not np.allclose(out[-1], expected): raise RuntimeError('cumsum was found to be unstable: ' - 'its results do not correspond to sum') + 'its last element does not correspond to sum') return out From 9ced9da4b3e99b2a3c56ae2cee08e9fde4a7ff69 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Thu, 8 Sep 2016 13:54:42 +1000 Subject: [PATCH 4/6] TST Add test with reduced rtol, atol --- sklearn/utils/extmath.py | 13 +++++++++++-- sklearn/utils/tests/test_extmath.py | 11 +++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index ed2d2470b7737..b570d56905560 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -853,12 +853,21 @@ def _deterministic_vector_sign_flip(u): return u -def stable_cumsum(arr): +def stable_cumsum(arr, rtol=1e-05, atol=1e-08): """Use high precision for cumsum and check that final value matches sum + + Parameters + ---------- + arr : array-like + To be cumulatively summed as flat + rtol : float + Relative tolerance, see ``np.allclose`` + atol : float + Absolute tolerance, see ``np.allclose`` """ out = np.cumsum(arr, dtype=np.float64) expected = np.sum(arr, dtype=np.float64) - if not np.allclose(out[-1], expected): + if not np.allclose(out[-1], expected, rtol=rtol, atol=atol): raise RuntimeError('cumsum was found to be unstable: ' 'its last element does not correspond to sum') return out diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 5847d0566a9e5..23e8a110a614b 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -17,6 +17,7 @@ from sklearn.utils.testing import assert_false from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_raises +from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import skip_if_32bit from sklearn.utils.extmath import density @@ -32,6 +33,7 @@ from sklearn.utils.extmath import _incremental_mean_and_var from sklearn.utils.extmath import _deterministic_vector_sign_flip from sklearn.utils.extmath import softmax +from sklearn.utils.extmath import stable_cumsum from sklearn.datasets.samples_generator import make_low_rank_matrix @@ -643,3 +645,12 @@ def test_softmax(): exp_X = np.exp(X) sum_exp_X = np.sum(exp_X, axis=1).reshape((-1, 1)) assert_array_almost_equal(softmax(X), exp_X / sum_exp_X) + + +def test_stable_cumsum(): + assert_array_equal(stable_cumsum([1, 2, 3]), np.cumsum([1, 2, 3])) + r = np.random.RandomState(0).rand(100000) + assert_raise_message(RuntimeError, + 'cumsum was found to be unstable: its last element ' + 'does not correspond to sum', + stable_cumsum, r, rtol=0, atol=0) From 0cf09bc3398caffeaf6df9adba366c8fd948b720 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Fri, 9 Sep 2016 01:02:44 +1000 Subject: [PATCH 5/6] Limit test to numpy >=1.9 --- sklearn/utils/tests/test_extmath.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 23e8a110a614b..8d334f0c2322f 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -19,6 +19,8 @@ from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import skip_if_32bit +from sklearn.utils.testing import SkipTest +from sklearn.utils.fixes import np_version from sklearn.utils.extmath import density from sklearn.utils.extmath import logsumexp @@ -648,6 +650,8 @@ def test_softmax(): def test_stable_cumsum(): + if np_version < (1, 19): + raise SkipTest("Sum is as unstable as cumsum for numpy < 1.9") assert_array_equal(stable_cumsum([1, 2, 3]), np.cumsum([1, 2, 3])) r = np.random.RandomState(0).rand(100000) assert_raise_message(RuntimeError, From eb3500a8e5fb879134c246e46ccd464033c3ac2a Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Fri, 9 Sep 2016 01:23:06 +1000 Subject: [PATCH 6/6] TST correct numpy version check --- sklearn/utils/tests/test_extmath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 8d334f0c2322f..55f96cdf1574c 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -650,7 +650,7 @@ def test_softmax(): def test_stable_cumsum(): - if np_version < (1, 19): + if np_version < (1, 9): raise SkipTest("Sum is as unstable as cumsum for numpy < 1.9") assert_array_equal(stable_cumsum([1, 2, 3]), np.cumsum([1, 2, 3])) r = np.random.RandomState(0).rand(100000)