From fe5d1a6ed671bf8bf2e8a7e86a050c781b2d20c4 Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Fri, 30 Jul 2021 23:01:39 +0200 Subject: [PATCH 01/16] Fixing the usage of sample_weights in CalibratedClassifierCV with ensemble=False + updating the corresponding test --- sklearn/calibration.py | 39 +++++++++++++++++++++---------- sklearn/tests/test_calibration.py | 28 ++++++++++++---------- 2 files changed, 43 insertions(+), 24 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 12d643f6e21dc..800408a18b5ab 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -338,23 +338,37 @@ def fit(self, X, y, sample_weight=None): else: this_estimator = clone(base_estimator) _, method_name = _get_prediction_method(this_estimator) - pred_method = partial( - cross_val_predict, - estimator=this_estimator, - X=X, - y=y, - cv=cv, - method=method_name, - n_jobs=self.n_jobs, - ) - predictions = _compute_predictions( - pred_method, method_name, X, n_classes - ) if sample_weight is not None and supports_sw: + pred_method = partial( + cross_val_predict, + estimator=this_estimator, + X=X, + y=y, + cv=cv, + method=method_name, + n_jobs=self.n_jobs, + fit_params={"sample_weight": sample_weight}, + ) + predictions = _compute_predictions( + pred_method, method_name, X, n_classes + ) this_estimator.fit(X, y, sample_weight) else: + pred_method = partial( + cross_val_predict, + estimator=this_estimator, + X=X, + y=y, + cv=cv, + method=method_name, + n_jobs=self.n_jobs, + ) + predictions = _compute_predictions( + pred_method, method_name, X, n_classes + ) this_estimator.fit(X, y) + calibrated_classifier = _fit_calibrator( this_estimator, predictions, @@ -363,6 +377,7 @@ def fit(self, X, y, sample_weight=None): self.method, sample_weight, ) + self.calibrated_classifiers_.append(calibrated_classifier) first_clf = self.calibrated_classifiers_[0].base_estimator diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 4fe08c27fb19e..bc04ecf2a7da2 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -156,27 +156,31 @@ def test_calibration_cv_splitter(data, ensemble): @pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) @pytest.mark.parametrize("ensemble", [True, False]) def test_sample_weight(data, method, ensemble): - n_samples = 100 - X, y = data + X, y = make_blobs((100, 1000), center_box=(-1, 1), random_state=42) - sample_weight = np.random.RandomState(seed=42).uniform(size=len(y)) - X_train, y_train, sw_train = X[:n_samples], y[:n_samples], sample_weight[:n_samples] - X_test = X[n_samples:] + # Compute weigths to compensate the unbalance of the dataset + sample_weight = 9 * (y == 0) + 1 + + X_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split( + X, y, sample_weight, random_state=42 + ) base_estimator = LinearSVC(random_state=42) calibrated_clf = CalibratedClassifierCV( base_estimator, method=method, ensemble=ensemble ) calibrated_clf.fit(X_train, y_train, sample_weight=sw_train) - probs_with_sw = calibrated_clf.predict_proba(X_test) + pred = calibrated_clf.predict_proba(X_test)[:, 1] - # As the weights are used for the calibration, they should still yield - # different predictions - calibrated_clf.fit(X_train, y_train) - probs_without_sw = calibrated_clf.predict_proba(X_test) + # Compute the calibration error + hist_0 = np.histogram(pred[y_test == 0], bins=np.linspace(0, 1, 6), density=True) + hist_1 = np.histogram(pred[y_test == 1], bins=np.linspace(0, 1, 6), density=True) - diff = np.linalg.norm(probs_with_sw - probs_without_sw) - assert diff > 0.1 + diff = np.linalg.norm( + (hist_0[1][:-1] + hist_0[1][1:]) / 2 + - hist_1[0] / (hist_0[0] + hist_1[0] + 1e-10) + ) + assert diff < 0.3 @pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) From e42eac211bc7f834c14d8b92ac30f7f84be77312 Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Fri, 30 Jul 2021 23:16:38 +0200 Subject: [PATCH 02/16] replace custom error by brier score --- sklearn/tests/test_calibration.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index bc04ecf2a7da2..a250c28c77e4b 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -170,17 +170,9 @@ def test_sample_weight(data, method, ensemble): base_estimator, method=method, ensemble=ensemble ) calibrated_clf.fit(X_train, y_train, sample_weight=sw_train) - pred = calibrated_clf.predict_proba(X_test)[:, 1] + predictions = calibrated_clf.predict_proba(X_test)[:, 1] - # Compute the calibration error - hist_0 = np.histogram(pred[y_test == 0], bins=np.linspace(0, 1, 6), density=True) - hist_1 = np.histogram(pred[y_test == 1], bins=np.linspace(0, 1, 6), density=True) - - diff = np.linalg.norm( - (hist_0[1][:-1] + hist_0[1][1:]) / 2 - - hist_1[0] / (hist_0[0] + hist_1[0] + 1e-10) - ) - assert diff < 0.3 + assert brier_score_loss(y_test, predictions, sample_weight=sw_test) < 0.2 @pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) From 3b962cc0795697ff5c983a2e6d107fe2bd6c523a Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Fri, 30 Jul 2021 23:27:50 +0200 Subject: [PATCH 03/16] removing unnecessary newline --- sklearn/calibration.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 800408a18b5ab..faf0ffe31cae6 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -368,7 +368,6 @@ def fit(self, X, y, sample_weight=None): pred_method, method_name, X, n_classes ) this_estimator.fit(X, y) - calibrated_classifier = _fit_calibrator( this_estimator, predictions, @@ -377,7 +376,6 @@ def fit(self, X, y, sample_weight=None): self.method, sample_weight, ) - self.calibrated_classifiers_.append(calibrated_classifier) first_clf = self.calibrated_classifiers_[0].base_estimator From 23b32e57d6b32388c9ae7010c0731cbd89c4043e Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Sat, 31 Jul 2021 14:18:31 +0200 Subject: [PATCH 04/16] Reintroducing the old test. Reduce diff size of the bug fix. Add changelog --- doc/whats_new/v1.0.rst | 4 +++ sklearn/calibration.py | 43 +++++++++++++------------------ sklearn/tests/test_calibration.py | 26 +++++++++++++++++++ 3 files changed, 48 insertions(+), 25 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 7f44c62eb7329..1b04e5ff7869d 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -146,6 +146,10 @@ Changelog as `base_estimator` in ::class:`calibration.CalibratedClassifierCV`. :pr:`20087` by :user:`Clément Fauchereau `. +- |Fix| Fixed ::class:`calibration.CalibratedClassifierCV` to handle correctly + `sample_weight` when `ensemble=False`. :pr:`20638` by + :user:`Julien Bohné ` + :mod:`sklearn.cluster` ...................... diff --git a/sklearn/calibration.py b/sklearn/calibration.py index faf0ffe31cae6..cdb4d49d9518a 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -338,35 +338,28 @@ def fit(self, X, y, sample_weight=None): else: this_estimator = clone(base_estimator) _, method_name = _get_prediction_method(this_estimator) + fit_params = ( + {"sample_weight": sample_weight} + if sample_weight is not None and supports_sw + else None + ) + pred_method = partial( + cross_val_predict, + estimator=this_estimator, + X=X, + y=y, + cv=cv, + method=method_name, + n_jobs=self.n_jobs, + fit_params=fit_params, + ) + predictions = _compute_predictions( + pred_method, method_name, X, n_classes + ) if sample_weight is not None and supports_sw: - pred_method = partial( - cross_val_predict, - estimator=this_estimator, - X=X, - y=y, - cv=cv, - method=method_name, - n_jobs=self.n_jobs, - fit_params={"sample_weight": sample_weight}, - ) - predictions = _compute_predictions( - pred_method, method_name, X, n_classes - ) this_estimator.fit(X, y, sample_weight) else: - pred_method = partial( - cross_val_predict, - estimator=this_estimator, - X=X, - y=y, - cv=cv, - method=method_name, - n_jobs=self.n_jobs, - ) - predictions = _compute_predictions( - pred_method, method_name, X, n_classes - ) this_estimator.fit(X, y) calibrated_classifier = _fit_calibrator( this_estimator, diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index a250c28c77e4b..ccbeb18570ce3 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -156,6 +156,32 @@ def test_calibration_cv_splitter(data, ensemble): @pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) @pytest.mark.parametrize("ensemble", [True, False]) def test_sample_weight(data, method, ensemble): + n_samples = 100 + X, y = data + + sample_weight = np.random.RandomState(seed=42).uniform(size=len(y)) + X_train, y_train, sw_train = X[:n_samples], y[:n_samples], sample_weight[:n_samples] + X_test = X[n_samples:] + + base_estimator = LinearSVC(random_state=42) + calibrated_clf = CalibratedClassifierCV( + base_estimator, method=method, ensemble=ensemble + ) + calibrated_clf.fit(X_train, y_train, sample_weight=sw_train) + probs_with_sw = calibrated_clf.predict_proba(X_test) + + # As the weights are used for the calibration, they should still yield + # different predictions + calibrated_clf.fit(X_train, y_train) + probs_without_sw = calibrated_clf.predict_proba(X_test) + + diff = np.linalg.norm(probs_with_sw - probs_without_sw) + assert diff > 0.1 + + +@pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) +@pytest.mark.parametrize("ensemble", [True, False]) +def test_sample_weight_class_imbalanced(data, method, ensemble): X, y = make_blobs((100, 1000), center_box=(-1, 1), random_state=42) # Compute weigths to compensate the unbalance of the dataset From ea8acd3f1efab4f6947db75a7096cce96bef5796 Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Sun, 8 Aug 2021 14:40:51 +0200 Subject: [PATCH 05/16] Update sklearn/tests/test_calibration.py add stratify=y when splitting train and test sets Co-authored-by: Guillaume Lemaitre --- sklearn/tests/test_calibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index ccbeb18570ce3..74f25f6edece9 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -188,7 +188,7 @@ def test_sample_weight_class_imbalanced(data, method, ensemble): sample_weight = 9 * (y == 0) + 1 X_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split( - X, y, sample_weight, random_state=42 + X, y, sample_weight, stratify=y, random_state=42 ) base_estimator = LinearSVC(random_state=42) From 40fc9530400bfefd7353cec1bef653d57a7dfe09 Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Sun, 8 Aug 2021 14:42:44 +0200 Subject: [PATCH 06/16] Update doc/whats_new/v1.0.rst Co-authored-by: Guillaume Lemaitre --- doc/whats_new/v1.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 1b04e5ff7869d..6cdb84d4269ab 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -147,8 +147,8 @@ Changelog :pr:`20087` by :user:`Clément Fauchereau `. - |Fix| Fixed ::class:`calibration.CalibratedClassifierCV` to handle correctly - `sample_weight` when `ensemble=False`. :pr:`20638` by - :user:`Julien Bohné ` + `sample_weight` when `ensemble=False`. + :pr:`20638` by :user:`Julien Bohné `. :mod:`sklearn.cluster` ...................... From da6970a72268edd18021e6f42c01b1386e5161c1 Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Wed, 22 Sep 2021 21:16:07 +0200 Subject: [PATCH 07/16] Added standardscaler before using SVC Added test to check that results are similar with ensemble is False or True --- sklearn/tests/test_calibration.py | 44 ++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 2593350d33b5c..5c22aad0fab69 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -166,6 +166,12 @@ def test_sample_weight(data, method, ensemble): X_train, y_train, sw_train = X[:n_samples], y[:n_samples], sample_weight[:n_samples] X_test = X[n_samples:] + scaler = StandardScaler() + X_train = scaler.fit_transform( + X_train + ) # compute mean, std and transform training data as well + X_test = scaler.transform(X_test) + base_estimator = LinearSVC(random_state=42) calibrated_clf = CalibratedClassifierCV( base_estimator, method=method, ensemble=ensemble @@ -184,7 +190,7 @@ def test_sample_weight(data, method, ensemble): @pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) @pytest.mark.parametrize("ensemble", [True, False]) -def test_sample_weight_class_imbalanced(data, method, ensemble): +def test_sample_weight_class_imbalanced(method, ensemble): X, y = make_blobs((100, 1000), center_box=(-1, 1), random_state=42) # Compute weigths to compensate the unbalance of the dataset @@ -194,6 +200,12 @@ def test_sample_weight_class_imbalanced(data, method, ensemble): X, y, sample_weight, stratify=y, random_state=42 ) + scaler = StandardScaler() + X_train = scaler.fit_transform( + X_train + ) # compute mean, std and transform training data as well + X_test = scaler.transform(X_test) + base_estimator = LinearSVC(random_state=42) calibrated_clf = CalibratedClassifierCV( base_estimator, method=method, ensemble=ensemble @@ -204,6 +216,36 @@ def test_sample_weight_class_imbalanced(data, method, ensemble): assert brier_score_loss(y_test, predictions, sample_weight=sw_test) < 0.2 +@pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) +def test_sample_weight_class_imbalanced_ensemble_equivalent(method): + X, y = make_blobs((100, 1000), center_box=(-1, 1), random_state=42) + + # Compute weigths to compensate the unbalance of the dataset + sample_weight = 9 * (y == 0) + 1 + + X_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split( + X, y, sample_weight, stratify=y, random_state=42 + ) + + scaler = StandardScaler() + X_train = scaler.fit_transform( + X_train + ) # compute mean, std and transform training data as well + X_test = scaler.transform(X_test) + + predictions = [] + for ensemble in [True, False]: + base_estimator = LinearSVC(random_state=42) + calibrated_clf = CalibratedClassifierCV( + base_estimator, method=method, ensemble=ensemble + ) + calibrated_clf.fit(X_train, y_train, sample_weight=sw_train) + predictions.append(calibrated_clf.predict_proba(X_test)[:, 1]) + + diff = np.linalg.norm(predictions[0] - predictions[1]) + assert diff < 1.5 + + @pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) @pytest.mark.parametrize("ensemble", [True, False]) def test_parallel_execution(data, method, ensemble): From 4f93e3804e637a1346b52c3e4e7d08342af624ae Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Thu, 23 Sep 2021 20:30:10 +0200 Subject: [PATCH 08/16] edited whats_new/v1.0.rst --- doc/whats_new/v1.0.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index d8776653cd9e8..3b8989348692e 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -2,6 +2,23 @@ .. currentmodule:: sklearn +.. _changes_1_0_1: + +Version 1.0.1 +============== + +**In Development** + +Changelog +--------- + +:mod:`sklearn.calibration` +...................... + +- |Fix| Fixed :class:`calibration.CalibratedClassifierCV` to handle correctly + `sample_weight` when `ensemble=False`. + :pr:`20638` by :user:`Julien Bohné `. + .. _changes_1_0: Version 1.0.0 From 57d3aa6b5d41f104f76f0ef9419feba71d82037e Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Thu, 23 Sep 2021 22:36:30 +0200 Subject: [PATCH 09/16] correcting whats_new/v1.0.rst --- doc/whats_new/v1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 3b8989348692e..c8a507b8f7177 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -5,7 +5,7 @@ .. _changes_1_0_1: Version 1.0.1 -============== +============= **In Development** From 1d0187bf69ac20b62cb772bdd9429cbc44814e84 Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Thu, 23 Sep 2021 22:38:50 +0200 Subject: [PATCH 10/16] correcting whats_new/v1.0.rst --- doc/whats_new/v1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index c8a507b8f7177..92983b0041b74 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -13,7 +13,7 @@ Changelog --------- :mod:`sklearn.calibration` -...................... +.......................... - |Fix| Fixed :class:`calibration.CalibratedClassifierCV` to handle correctly `sample_weight` when `ensemble=False`. From 81e8bd3bb17e0bcc14def45c664bf40356c4fd38 Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Sun, 26 Sep 2021 09:56:28 +0200 Subject: [PATCH 11/16] Update sklearn/tests/test_calibration.py Co-authored-by: Guillaume Lemaitre --- sklearn/tests/test_calibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 5c22aad0fab69..a158cd90f64db 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -193,7 +193,7 @@ def test_sample_weight(data, method, ensemble): def test_sample_weight_class_imbalanced(method, ensemble): X, y = make_blobs((100, 1000), center_box=(-1, 1), random_state=42) - # Compute weigths to compensate the unbalance of the dataset + # Compute weights to compensate for the unbalance of the dataset sample_weight = 9 * (y == 0) + 1 X_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split( From cf99fba9b6876ce412edc288383bb071bf84a9e2 Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Sun, 26 Sep 2021 09:57:26 +0200 Subject: [PATCH 12/16] Update sklearn/tests/test_calibration.py Co-authored-by: Guillaume Lemaitre --- sklearn/tests/test_calibration.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index a158cd90f64db..a159769f7e48e 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -194,7 +194,8 @@ def test_sample_weight_class_imbalanced(method, ensemble): X, y = make_blobs((100, 1000), center_box=(-1, 1), random_state=42) # Compute weights to compensate for the unbalance of the dataset - sample_weight = 9 * (y == 0) + 1 + weights = np.array([0.9, 0.1]) + sample_weight = weights[(y == 1).astype(int)] X_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split( X, y, sample_weight, stratify=y, random_state=42 From e71995250da2a0b15ad78bc125b5af99bd009710 Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Sun, 26 Sep 2021 09:57:43 +0200 Subject: [PATCH 13/16] Update sklearn/tests/test_calibration.py Co-authored-by: Guillaume Lemaitre --- sklearn/tests/test_calibration.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index a159769f7e48e..86fc56a2dfe8b 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -191,6 +191,8 @@ def test_sample_weight(data, method, ensemble): @pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) @pytest.mark.parametrize("ensemble", [True, False]) def test_sample_weight_class_imbalanced(method, ensemble): + """Use an imbalanced dataset to check that `sample_weight` is taken into + account in the calibration estimator.""" X, y = make_blobs((100, 1000), center_box=(-1, 1), random_state=42) # Compute weights to compensate for the unbalance of the dataset From 71de6f73a54411623fca3d1ca9a4d7decf9e09cc Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Sun, 26 Sep 2021 09:57:57 +0200 Subject: [PATCH 14/16] Update sklearn/tests/test_calibration.py Co-authored-by: Guillaume Lemaitre --- sklearn/tests/test_calibration.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 86fc56a2dfe8b..0e7bccfdaf3e4 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -204,9 +204,7 @@ def test_sample_weight_class_imbalanced(method, ensemble): ) scaler = StandardScaler() - X_train = scaler.fit_transform( - X_train - ) # compute mean, std and transform training data as well + X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) base_estimator = LinearSVC(random_state=42) From 9b021c37fbbfdc15f5eab08cdfc5c94a245f34bf Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Sun, 26 Sep 2021 09:58:43 +0200 Subject: [PATCH 15/16] Update sklearn/tests/test_calibration.py Co-authored-by: Guillaume Lemaitre --- sklearn/tests/test_calibration.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 0e7bccfdaf3e4..f1c9977e9c1d0 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -203,6 +203,9 @@ def test_sample_weight_class_imbalanced(method, ensemble): X, y, sample_weight, stratify=y, random_state=42 ) + # FIXME: ideally we should create a `Pipeline` with the `StandardScaler` + # followed by the `LinearSVC`. However, `Pipeline` does not expose + # `sample_weight` and it will be silently ignored. scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) From 734870cd615de28533b38c6e18ec8f18e7cbba19 Mon Sep 17 00:00:00 2001 From: JulienB-78 Date: Sun, 26 Sep 2021 10:53:19 +0200 Subject: [PATCH 16/16] correcting indentation --- sklearn/tests/test_calibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index f1c9977e9c1d0..da1645a1c0fd6 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -196,7 +196,7 @@ def test_sample_weight_class_imbalanced(method, ensemble): X, y = make_blobs((100, 1000), center_box=(-1, 1), random_state=42) # Compute weights to compensate for the unbalance of the dataset - weights = np.array([0.9, 0.1]) + weights = np.array([0.9, 0.1]) sample_weight = weights[(y == 1).astype(int)] X_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split(