From 3b50e8d59ae8b1a700a6051720b6ec94bd49e8f1 Mon Sep 17 00:00:00 2001 From: Christian Lorentzen Date: Thu, 25 Apr 2024 20:04:51 +0200 Subject: [PATCH 1/5] ENH use scipy isotonic_regression --- sklearn/isotonic.py | 23 ++++++++++++++++------- sklearn/tests/test_isotonic.py | 27 ++++++++++++++------------- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py index 04456b1763791..7067425f1783a 100644 --- a/sklearn/isotonic.py +++ b/sklearn/isotonic.py @@ -8,13 +8,14 @@ from numbers import Real import numpy as np -from scipy import interpolate +from scipy import interpolate, optimize from scipy.stats import spearmanr from ._isotonic import _inplace_contiguous_isotonic_regression, _make_unique from .base import BaseEstimator, RegressorMixin, TransformerMixin, _fit_context from .utils import check_array, check_consistent_length from .utils._param_validation import Interval, StrOptions, validate_params +from .utils.fixes import parse_version, sp_version from .utils.validation import _check_sample_weight, check_is_fitted __all__ = ["check_increasing", "isotonic_regression", "IsotonicRegression"] @@ -151,13 +152,21 @@ def isotonic_regression( array([2.75 , 2.75 , 2.75 , 2.75 , 7.33..., 7.33..., 7.33..., 7.33..., 7.33..., 7.33...]) """ - order = np.s_[:] if increasing else np.s_[::-1] y = check_array(y, ensure_2d=False, input_name="y", dtype=[np.float64, np.float32]) - y = np.array(y[order], dtype=y.dtype) - sample_weight = _check_sample_weight(sample_weight, y, dtype=y.dtype, copy=True) - sample_weight = np.ascontiguousarray(sample_weight[order]) + if sp_version >= parse_version("1.12.0"): + res = optimize.isotonic_regression( + y=y, weights=sample_weight, increasing=increasing + ) + y = np.asarray(res.x, dtype=y.dtype) + else: + # TODO: remove this branch when Scipy 1.12 is the minimum supported version + # Also remove _inplace_contiguous_isotonic_regression. + order = np.s_[:] if increasing else np.s_[::-1] + y = np.array(y[order], dtype=y.dtype) + sample_weight = _check_sample_weight(sample_weight, y, dtype=y.dtype, copy=True) + sample_weight = np.ascontiguousarray(sample_weight[order]) + _inplace_contiguous_isotonic_regression(y, sample_weight) - _inplace_contiguous_isotonic_regression(y, sample_weight) if y_min is not None or y_max is not None: # Older versions of np.clip don't accept None as a bound, so use np.inf if y_min is None: @@ -165,7 +174,7 @@ def isotonic_regression( if y_max is None: y_max = np.inf np.clip(y, y_min, y_max, y) - return y[order] + return y class IsotonicRegression(RegressorMixin, TransformerMixin, BaseEstimator): diff --git a/sklearn/tests/test_isotonic.py b/sklearn/tests/test_isotonic.py index 93df0221236b8..94acb30c57bf6 100644 --- a/sklearn/tests/test_isotonic.py +++ b/sklearn/tests/test_isotonic.py @@ -502,25 +502,26 @@ def test_isotonic_copy_before_fit(): copy.copy(ir) -def test_isotonic_dtype(): +@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64]) +def test_isotonic_dtype(dtype): y = [2, 1, 4, 3, 5] weights = np.array([0.9, 0.9, 0.9, 0.9, 0.9], dtype=np.float64) reg = IsotonicRegression() - for dtype in (np.int32, np.int64, np.float32, np.float64): - for sample_weight in (None, weights.astype(np.float32), weights): - y_np = np.array(y, dtype=dtype) - expected_dtype = check_array( - y_np, dtype=[np.float64, np.float32], ensure_2d=False - ).dtype + for sample_weight in (None, weights.astype(np.float32), weights): + y_np = np.array(y, dtype=dtype) + expected_dtype = check_array( + y_np, dtype=[np.float64, np.float32], ensure_2d=False + ).dtype - res = isotonic_regression(y_np, sample_weight=sample_weight) - assert res.dtype == expected_dtype + res = isotonic_regression(y_np, sample_weight=sample_weight) + print(f"{res.dtype=}") + assert res.dtype == expected_dtype - X = np.arange(len(y)).astype(dtype) - reg.fit(X, y_np, sample_weight=sample_weight) - res = reg.predict(X) - assert res.dtype == expected_dtype + X = np.arange(len(y)).astype(dtype) + reg.fit(X, y_np, sample_weight=sample_weight) + res = reg.predict(X) + assert res.dtype == expected_dtype @pytest.mark.parametrize("y_dtype", [np.int32, np.int64, np.float32, np.float64]) From 17e9077cc26eddc123695374e19cb095380d65bb Mon Sep 17 00:00:00 2001 From: Christian Lorentzen Date: Fri, 26 Apr 2024 09:14:17 +0200 Subject: [PATCH 2/5] MNT replace datetime by default_timer --- benchmarks/bench_isotonic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/bench_isotonic.py b/benchmarks/bench_isotonic.py index 556c452fa3323..be2ff6548cb92 100644 --- a/benchmarks/bench_isotonic.py +++ b/benchmarks/bench_isotonic.py @@ -13,7 +13,7 @@ import argparse import gc -from datetime import datetime +from timeit import default_timer import matplotlib.pyplot as plt import numpy as np @@ -52,9 +52,9 @@ def bench_isotonic_regression(Y): """ gc.collect() - tstart = datetime.now() + tstart = default_timer() isotonic_regression(Y) - return (datetime.now() - tstart).total_seconds() + return default_timer() - tstart if __name__ == "__main__": From 8bf213f4e6b112cb1a5b1cefec8fff128009e429 Mon Sep 17 00:00:00 2001 From: Christian Lorentzen Date: Fri, 26 Apr 2024 09:17:01 +0200 Subject: [PATCH 3/5] CLN remove debug print --- sklearn/tests/test_isotonic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/tests/test_isotonic.py b/sklearn/tests/test_isotonic.py index 94acb30c57bf6..6085b65574007 100644 --- a/sklearn/tests/test_isotonic.py +++ b/sklearn/tests/test_isotonic.py @@ -515,7 +515,6 @@ def test_isotonic_dtype(dtype): ).dtype res = isotonic_regression(y_np, sample_weight=sample_weight) - print(f"{res.dtype=}") assert res.dtype == expected_dtype X = np.arange(len(y)).astype(dtype) From 4ca6e8bfdf142e7788e22edb86f92bf16cafba70 Mon Sep 17 00:00:00 2001 From: Christian Lorentzen Date: Thu, 9 May 2024 12:00:11 +0200 Subject: [PATCH 4/5] CLN replace sp_version with sp_base_version --- sklearn/isotonic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py index 7067425f1783a..88a442a162b88 100644 --- a/sklearn/isotonic.py +++ b/sklearn/isotonic.py @@ -15,7 +15,7 @@ from .base import BaseEstimator, RegressorMixin, TransformerMixin, _fit_context from .utils import check_array, check_consistent_length from .utils._param_validation import Interval, StrOptions, validate_params -from .utils.fixes import parse_version, sp_version +from .utils.fixes import parse_version, sp_base_version from .utils.validation import _check_sample_weight, check_is_fitted __all__ = ["check_increasing", "isotonic_regression", "IsotonicRegression"] @@ -153,7 +153,7 @@ def isotonic_regression( 7.33..., 7.33..., 7.33..., 7.33..., 7.33...]) """ y = check_array(y, ensure_2d=False, input_name="y", dtype=[np.float64, np.float32]) - if sp_version >= parse_version("1.12.0"): + if sp_base_version >= parse_version("1.12.0"): res = optimize.isotonic_regression( y=y, weights=sample_weight, increasing=increasing ) From 43cadcb4bdc2548f60037d6ce35d91afa91e5819 Mon Sep 17 00:00:00 2001 From: Christian Lorentzen Date: Fri, 10 May 2024 13:15:48 +0200 Subject: [PATCH 5/5] FIX missing order in old path --- sklearn/isotonic.py | 1 + sklearn/tests/test_isotonic.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py index 88a442a162b88..f1c7f48966946 100644 --- a/sklearn/isotonic.py +++ b/sklearn/isotonic.py @@ -166,6 +166,7 @@ def isotonic_regression( sample_weight = _check_sample_weight(sample_weight, y, dtype=y.dtype, copy=True) sample_weight = np.ascontiguousarray(sample_weight[order]) _inplace_contiguous_isotonic_regression(y, sample_weight) + y = y[order] if y_min is not None or y_max is not None: # Older versions of np.clip don't accept None as a bound, so use np.inf diff --git a/sklearn/tests/test_isotonic.py b/sklearn/tests/test_isotonic.py index 6085b65574007..90598b48f6434 100644 --- a/sklearn/tests/test_isotonic.py +++ b/sklearn/tests/test_isotonic.py @@ -227,7 +227,13 @@ def test_isotonic_regression_with_ties_in_differently_sized_groups(): def test_isotonic_regression_reversed(): y = np.array([10, 9, 10, 7, 6, 6.1, 5]) + y_result = np.array([10, 9.5, 9.5, 7, 6.05, 6.05, 5]) + + y_iso = isotonic_regression(y, increasing=False) + assert_allclose(y_iso, y_result) + y_ = IsotonicRegression(increasing=False).fit_transform(np.arange(len(y)), y) + assert_allclose(y_, y_result) assert_array_equal(np.ones(y_[:-1].shape), ((y_[:-1] - y_[1:]) >= 0))