diff --git a/.circleci/config.yml b/.circleci/config.yml index b5f679af6..8990d3f22 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -24,6 +24,7 @@ jobs: - NUMPYDOC_VERSION: 'latest' - SPHINXCONTRIB_BIBTEX_VERSION: 'latest' - PYDATA_SPHINX_THEME_VERSION: 'latest' + - SPHINX_DESIGN_VERSION: 'latest' steps: - add_ssh_keys: fingerprints: diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5c4218dec..98f2b4e11 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -255,7 +255,7 @@ jobs: - template: build_tools/azure/posix.yml parameters: name: macOS - vmImage: macOS-11 + vmImage: macOS-12 dependsOn: [linting, git_commit] condition: | and( diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh index 32699e8a8..9601b44aa 100755 --- a/build_tools/circle/build_doc.sh +++ b/build_tools/circle/build_doc.sh @@ -114,6 +114,7 @@ mamba create -n $CONDA_ENV_NAME --yes --quiet \ "$(get_dep sphinxcontrib-bibtex $SPHINXCONTRIB_BIBTEX_VERSION)" \ "$(get_dep sphinx-copybutton $SPHINXCONTRIB_BIBTEX_VERSION)" \ "$(get_dep pydata-sphinx-theme $PYDATA_SPHINX_THEME_VERSION)" \ + "$(get_dep sphinx-design $SPHINX_DESIGN_VERSION)" \ memory_profiler packaging seaborn pytest coverage compilers tensorflow source activate $CONDA_ENV_NAME diff --git a/conftest.py b/conftest.py index 45a5ce679..0dc6e5a23 100644 --- a/conftest.py +++ b/conftest.py @@ -7,7 +7,14 @@ import os +import numpy as np import pytest +from sklearn.utils.fixes import parse_version + +# use legacy numpy print options to avoid failures due to NumPy 2.+ scalar +# representation +if parse_version(np.__version__) > parse_version("2.0.0"): + np.set_printoptions(legacy="1.25") def pytest_runtest_setup(item): diff --git a/doc/_static/css/imbalanced-learn.css b/doc/_static/css/imbalanced-learn.css index 6c778540b..3778ee94c 100644 --- a/doc/_static/css/imbalanced-learn.css +++ b/doc/_static/css/imbalanced-learn.css @@ -21,39 +21,44 @@ /* Override some aspects of the pydata-sphinx-theme */ -/* Getting started index page */ +/* Main index page overview cards */ .intro-card { - background: #fff; - border-radius: 0; - padding: 30px 10px 10px 10px; - margin: 10px 0px; -} - -.intro-card .card-text { - margin: 20px 0px; - /*min-height: 150px; */ -} - -.custom-button { - background-color: #dcdcdc; - border: none; - color: #484848; - text-align: center; - text-decoration: none; - display: inline-block; - font-size: 0.9rem; - border-radius: 0.5rem; + padding: 30px 10px 20px 10px; +} + +.intro-card .sd-card-img-top { + margin: 10px; + height: 52px; + background: none !important; +} + +.intro-card .sd-card-title { + color: var(--pst-color-primary); + font-size: var(--pst-font-size-h5); + padding: 1rem 0rem 0.5rem 0rem; +} + +.intro-card .sd-card-footer { + border: none !important; +} + +.intro-card .sd-card-footer p.sd-card-text { max-width: 220px; - padding: 0.5rem 0rem; + margin-left: auto; + margin-right: auto; +} + +.intro-card .sd-btn-secondary { + background-color: #6c757d !important; + border-color: #6c757d !important; } -.custom-button a { - color: #484848; +.intro-card .sd-btn-secondary:hover { + background-color: #5a6268 !important; + border-color: #545b62 !important; } -.custom-button p { - margin-top: 0; - margin-bottom: 0rem; - color: #484848; +.card, .card img { + background-color: var(--pst-color-background); } diff --git a/doc/_static/img/logo_wide_dark.png b/doc/_static/img/logo_wide_dark.png new file mode 100644 index 000000000..38f997886 Binary files /dev/null and b/doc/_static/img/logo_wide_dark.png differ diff --git a/doc/_static/index_api.svg b/doc/_static/index_api.svg new file mode 100644 index 000000000..69f7ba1d2 --- /dev/null +++ b/doc/_static/index_api.svg @@ -0,0 +1,97 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + diff --git a/doc/_static/index_examples.svg b/doc/_static/index_examples.svg new file mode 100644 index 000000000..de3d90237 --- /dev/null +++ b/doc/_static/index_examples.svg @@ -0,0 +1,76 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + diff --git a/doc/_static/index_getting_started.svg b/doc/_static/index_getting_started.svg new file mode 100644 index 000000000..2d36622cb --- /dev/null +++ b/doc/_static/index_getting_started.svg @@ -0,0 +1,66 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/doc/_static/index_user_guide.svg b/doc/_static/index_user_guide.svg new file mode 100644 index 000000000..bd1705351 --- /dev/null +++ b/doc/_static/index_user_guide.svg @@ -0,0 +1,67 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/doc/conf.py b/doc/conf.py index a6361eafd..5561808ab 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -43,6 +43,7 @@ "sphinx_issues", "sphinx_gallery.gen_gallery", "sphinx_copybutton", + "sphinx_design", ] # Specify how to identify the prompt when copying code snippets @@ -106,10 +107,12 @@ html_theme_options = { "external_links": [], "github_url": "https://github.com/scikit-learn-contrib/imbalanced-learn", - # "twitter_url": "https://twitter.com/pandas_dev", "use_edit_page_button": True, "show_toc_level": 1, # "navbar_align": "right", # For testing that the navbar items align properly + "logo": { + "image_dark": "https://imbalanced-learn.org/stable/_static/img/logo_wide_dark.png" + }, } html_context = { @@ -323,15 +326,7 @@ def generate_min_dependency_substitutions(app): # -- Additional temporary hacks ----------------------------------------------- -# Temporary work-around for spacing problem between parameter and parameter -# type in the doc, see https://github.com/numpy/numpydoc/issues/215. The bug -# has been fixed in sphinx (https://github.com/sphinx-doc/sphinx/pull/5976) but -# through a change in sphinx basic.css except rtd_theme does not use basic.css. -# In an ideal world, this would get fixed in this PR: -# https://github.com/readthedocs/sphinx_rtd_theme/pull/747/files - def setup(app): app.connect("builder-inited", generate_min_dependency_table) app.connect("builder-inited", generate_min_dependency_substitutions) - app.add_css_file("basic.css") diff --git a/doc/index.rst b/doc/index.rst index aa3d7a9b2..238786314 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -21,80 +21,82 @@ Imbalanced-learn (imported as :mod:`imblearn`) is an open source, MIT-licensed library relying on scikit-learn (imported as :mod:`sklearn`) and provides tools when dealing with classification with imbalanced classes. -.. raw:: html - -
-
-
-
-
- -
Getting started
-

Check out the getting started guides to install imbalanced-learn. - Some extra information to get started with a new contribution is also provided.

- -.. container:: custom-button - - :ref:`To the installation guideline` - -.. raw:: html - -
-
-
-
-
-
- -
User guide
-

The user guide provides in-depth information on the - key concepts of imbalanced-learn with useful background information and explanation.

- -.. container:: custom-button - - :ref:`To the user guide` - -.. raw:: html - -
-
-
-
-
-
- -
API reference
-

The reference guide contains a detailed description of - the imbalanced-learn API. To known more about methods parameters.

- -.. container:: custom-button - - :ref:`To the reference guide` - -.. raw:: html - -
-
-
-
-
-
- -
Examples
-

The gallery of examples is a good place to see imbalanced-learn in action. - Select an example and dive in.

- -.. container:: custom-button - - :ref:`To the gallery of examples` - -.. raw:: html - -
-
-
-
-
+.. grid:: 1 2 2 2 + :gutter: 4 + :padding: 2 2 0 0 + :class-container: sd-text-center + + .. grid-item-card:: Getting started + :img-top: _static/index_getting_started.svg + :class-card: intro-card + :shadow: md + + Check out the getting started guides to install `imbalanced-learn`. + Some extra information to get started with a new contribution is also provided. + + +++ + + .. button-ref:: getting_started + :ref-type: ref + :click-parent: + :color: secondary + :expand: + + To the installation guideline + + .. grid-item-card:: User guide + :img-top: _static/index_user_guide.svg + :class-card: intro-card + :shadow: md + + The user guide provides in-depth information on the key concepts of + `imbalanced-learn` with useful background information and explanation. + + +++ + + .. button-ref:: user_guide + :ref-type: ref + :click-parent: + :color: secondary + :expand: + + To the user guide + + .. grid-item-card:: API reference + :img-top: _static/index_api.svg + :class-card: intro-card + :shadow: md + + The reference guide contains a detailed description of + the `imbalanced-learn` API. To known more about methods parameters. + + +++ + + .. button-ref:: api + :ref-type: ref + :click-parent: + :color: secondary + :expand: + + To the reference guide + + .. grid-item-card:: Examples + :img-top: _static/index_examples.svg + :class-card: intro-card + :shadow: md + + The gallery of examples is a good place to see `imbalanced-learn` in action. + Select an example and dive in. + + +++ + + .. button-ref:: general_examples + :ref-type: ref + :click-parent: + :color: secondary + :expand: + + To the gallery of examples .. toctree:: diff --git a/doc/under_sampling.rst b/doc/under_sampling.rst index 499b5a3d9..8f8e7fbb8 100644 --- a/doc/under_sampling.rst +++ b/doc/under_sampling.rst @@ -497,8 +497,7 @@ The class can be used as:: >>> from sklearn.linear_model import LogisticRegression >>> from imblearn.under_sampling import InstanceHardnessThreshold >>> iht = InstanceHardnessThreshold(random_state=0, - ... estimator=LogisticRegression( - ... solver='lbfgs', multi_class='auto')) + ... estimator=LogisticRegression()) >>> X_resampled, y_resampled = iht.fit_resample(X, y) >>> print(sorted(Counter(y_resampled).items())) [(0, 64), (1, 64), (2, 64)] diff --git a/doc/whats_new/v0.11.rst b/doc/whats_new/v0.11.rst index b36d3a902..8f421ee69 100644 --- a/doc/whats_new/v0.11.rst +++ b/doc/whats_new/v0.11.rst @@ -1,37 +1,5 @@ .. _changes_0_11: -Version 0.11.1 -============== - -Changelog ---------- - -Bug fixes -......... - -- Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` where the entries of the - one-hot encoding should be divided by `sqrt(2)` and not `2`, taking into account that - they are plugged into an Euclidean distance computation. - :pr:`1014` by :user:`Guillaume Lemaitre `. - -- Raise an informative error message when all support vectors are tagged as noise in - :class:`~imblearn.over_sampling.SVMSMOTE`. - :pr:`1016` by :user:`Guillaume Lemaitre `. - -- Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` where the median of standard - deviation of the continuous features was only computed on the minority class. Now, - we are computing this statistic for each class that is up-sampled. - :pr:`1015` by :user:`Guillaume Lemaitre `. - -- Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` such that the case where - the median of standard deviation of the continuous features is null is handled - in the multiclass case as well. - :pr:`1015` by :user:`Guillaume Lemaitre `. - -- Fix a bug in :class:`~imblearn.over_sampling.BorderlineSMOTE` version 2 where samples - should be generated from the whole dataset and not only from the minority class. - :pr:`1023` by :user:`Guillaume Lemaitre `. - Version 0.11.0 ============== diff --git a/doc/whats_new/v0.12.rst b/doc/whats_new/v0.12.rst index df9df54a1..fb79497d8 100644 --- a/doc/whats_new/v0.12.rst +++ b/doc/whats_new/v0.12.rst @@ -1,6 +1,73 @@ .. _changes_0_12: -.. _changes_0_12: +Version 0.12.4 +============== + +**October 4, 2024** + +Changelog +--------- + +Compatibility +............. + +- Compatibility with NumPy 2.0+ + :pr:`1097` by :user:`Guillaume Lemaitre `. + +Version 0.12.3 +============== + +**May 28, 2024** + +Changelog +--------- + +Compatibility +............. + +- Compatibility with scikit-learn 1.5 + :pr:`1074` and :pr:`1084` by :user:`Guillaume Lemaitre `. + +Version 0.12.2 +============== + +**March 31, 2024** + +Changelog +--------- + +Bug fixes +......... + +- Fix the way we check for a specific Python version in the test suite. + :pr:`1075` by :user:`Guillaume Lemaitre `. + +Version 0.12.1 +============== + +**March 31, 2024** + +Changelog +--------- + +Bug fixes +......... + +- Fix a bug in :class:`~imblearn.under_sampling.InstanceHardnessThreshold` where + `estimator` could not be a :class:`~sklearn.pipeline.Pipeline` object. + :pr:`1049` by :user:`Gonenc Mogol `. + +Compatibility +............. + +- Do not use `distutils` in tests due to deprecation. + :pr:`1065` by :user:`Michael R. Crusoe `. + +- Fix the scikit-learn import in tests to be compatible with version 1.4.1.post1. + :pr:`1073` by :user:`Guillaume Lemaitre `. + +- Fix test to be compatible with Python 3.13. + :pr:`1073` by :user:`Guillaume Lemaitre `. Version 0.12.0 ============== @@ -13,6 +80,29 @@ Changelog Bug fixes ......... +- Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` where the entries of the + one-hot encoding should be divided by `sqrt(2)` and not `2`, taking into account that + they are plugged into an Euclidean distance computation. + :pr:`1014` by :user:`Guillaume Lemaitre `. + +- Raise an informative error message when all support vectors are tagged as noise in + :class:`~imblearn.over_sampling.SVMSMOTE`. + :pr:`1016` by :user:`Guillaume Lemaitre `. + +- Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` where the median of standard + deviation of the continuous features was only computed on the minority class. Now, + we are computing this statistic for each class that is up-sampled. + :pr:`1015` by :user:`Guillaume Lemaitre `. + +- Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` such that the case where + the median of standard deviation of the continuous features is null is handled + in the multiclass case as well. + :pr:`1015` by :user:`Guillaume Lemaitre `. + +- Fix a bug in :class:`~imblearn.over_sampling.BorderlineSMOTE` version 2 where samples + should be generated from the whole dataset and not only from the minority class. + :pr:`1023` by :user:`Guillaume Lemaitre `. + - Fix a bug in :class:`~imblearn.under_sampling.NeighbourhoodCleaningRule` where the `kind_sel="all"` was not working as explained in the literature. :pr:`1012` by :user:`Guillaume Lemaitre `. diff --git a/examples/api/plot_sampling_strategy_usage.py b/examples/api/plot_sampling_strategy_usage.py index dbb52fcdf..1c76a06b2 100644 --- a/examples/api/plot_sampling_strategy_usage.py +++ b/examples/api/plot_sampling_strategy_usage.py @@ -129,7 +129,7 @@ # %% [markdown] # `sampling_strategy` as a `dict` -# ------------------------------ +# ------------------------------- # # When `sampling_strategy` is a `dict`, the keys correspond to the targeted # classes. The values correspond to the desired number of samples for each diff --git a/examples/applications/plot_outlier_rejections.py b/examples/applications/plot_outlier_rejections.py index 55f03e273..985b9211a 100644 --- a/examples/applications/plot_outlier_rejections.py +++ b/examples/applications/plot_outlier_rejections.py @@ -109,12 +109,12 @@ def outlier_rejection(X, y): pipe = make_pipeline( FunctionSampler(func=outlier_rejection), - LogisticRegression(solver="lbfgs", multi_class="auto", random_state=rng), + LogisticRegression(random_state=rng), ) y_pred = pipe.fit(X_train, y_train).predict(X_test) print(classification_report(y_test, y_pred)) -clf = LogisticRegression(solver="lbfgs", multi_class="auto", random_state=rng) +clf = LogisticRegression(random_state=rng) y_pred = clf.fit(X_train, y_train).predict(X_test) print(classification_report(y_test, y_pred)) diff --git a/examples/applications/porto_seguro_keras_under_sampling.py b/examples/applications/porto_seguro_keras_under_sampling.py index ee8a6e2f0..9175427fa 100644 --- a/examples/applications/porto_seguro_keras_under_sampling.py +++ b/examples/applications/porto_seguro_keras_under_sampling.py @@ -151,7 +151,7 @@ def wrapper(*args, **kwds): # mini-batches. import tensorflow from sklearn.metrics import roc_auc_score -from sklearn.utils import parse_version +from sklearn.utils.fixes import parse_version tf_version = parse_version(tensorflow.__version__) diff --git a/examples/ensemble/plot_comparison_ensemble_classifier.py b/examples/ensemble/plot_comparison_ensemble_classifier.py index 602e477e5..8c318e5bc 100644 --- a/examples/ensemble/plot_comparison_ensemble_classifier.py +++ b/examples/ensemble/plot_comparison_ensemble_classifier.py @@ -197,7 +197,7 @@ from imblearn.ensemble import EasyEnsembleClassifier, RUSBoostClassifier -estimator = AdaBoostClassifier(n_estimators=10) +estimator = AdaBoostClassifier(n_estimators=10, algorithm="SAMME") eec = EasyEnsembleClassifier(n_estimators=10, estimator=estimator) eec.fit(X_train, y_train) y_pred_eec = eec.predict(X_test) diff --git a/imblearn/_config.py b/imblearn/_config.py index 4c093db09..ef98e7305 100644 --- a/imblearn/_config.py +++ b/imblearn/_config.py @@ -7,7 +7,7 @@ from contextlib import contextmanager as contextmanager import sklearn -from sklearn.utils import parse_version +from sklearn.utils.fixes import parse_version sklearn_version = parse_version(sklearn.__version__) diff --git a/imblearn/_min_dependencies.py b/imblearn/_min_dependencies.py index 497688765..ec1f5dedb 100644 --- a/imblearn/_min_dependencies.py +++ b/imblearn/_min_dependencies.py @@ -37,6 +37,7 @@ "numpydoc": ("1.5.0", "docs"), "sphinxcontrib-bibtex": ("2.4.1", "docs"), "pydata-sphinx-theme": ("0.13.3", "docs"), + "sphinx-design": ("0.5.0", "docs"), } diff --git a/imblearn/_version.py b/imblearn/_version.py index c0fef945a..ff7e11ace 100644 --- a/imblearn/_version.py +++ b/imblearn/_version.py @@ -22,4 +22,4 @@ # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = "0.12.0" +__version__ = "0.12.4" diff --git a/imblearn/base.py b/imblearn/base.py index e529fead6..0b2d94e84 100644 --- a/imblearn/base.py +++ b/imblearn/base.py @@ -17,7 +17,7 @@ from sklearn.base import _OneToOneFeatureMixin as OneToOneFeatureMixin from sklearn.preprocessing import label_binarize -from sklearn.utils import parse_version +from sklearn.utils.fixes import parse_version from sklearn.utils.multiclass import check_classification_targets from .utils import check_sampling_strategy, check_target_type diff --git a/imblearn/ensemble/_bagging.py b/imblearn/ensemble/_bagging.py index afcf3fd3a..acb0c70fa 100644 --- a/imblearn/ensemble/_bagging.py +++ b/imblearn/ensemble/_bagging.py @@ -16,7 +16,7 @@ from sklearn.ensemble._base import _partition_estimators from sklearn.exceptions import NotFittedError from sklearn.tree import DecisionTreeClassifier -from sklearn.utils import parse_version +from sklearn.utils.fixes import parse_version from sklearn.utils.validation import check_is_fitted try: @@ -386,7 +386,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None): self.sampler_ = clone(self.sampler) # RandomUnderSampler is not supporting sample_weight. We need to pass # None. - return super()._fit(X, y, self.max_samples, sample_weight=None) + return super()._fit(X, y, self.max_samples) # TODO: remove when minimum supported version of scikit-learn is 1.1 @available_if(_estimator_has("decision_function")) diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py index db3c6cbb7..e3c85741c 100644 --- a/imblearn/ensemble/_easy_ensemble.py +++ b/imblearn/ensemble/_easy_ensemble.py @@ -15,8 +15,8 @@ from sklearn.ensemble._bagging import _parallel_decision_function from sklearn.ensemble._base import _partition_estimators from sklearn.exceptions import NotFittedError -from sklearn.utils import parse_version from sklearn.utils._tags import _safe_tags +from sklearn.utils.fixes import parse_version from sklearn.utils.validation import check_is_fitted try: @@ -300,7 +300,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None): check_target_type(y) # RandomUnderSampler is not supporting sample_weight. We need to pass # None. - return super()._fit(X, y, self.max_samples, sample_weight=None) + return super()._fit(X, y, self.max_samples) # TODO: remove when minimum supported version of scikit-learn is 1.1 @available_if(_estimator_has("decision_function")) @@ -365,9 +365,11 @@ def base_estimator_(self): raise error raise error - def _more_tags(self): + def _get_estimator(self): if self.estimator is None: - estimator = AdaBoostClassifier(algorithm="SAMME") - else: - estimator = self.estimator - return {"allow_nan": _safe_tags(estimator, "allow_nan")} + return AdaBoostClassifier(algorithm="SAMME") + return self.estimator + + # TODO: remove when minimum supported version of scikit-learn is 1.5 + def _more_tags(self): + return {"allow_nan": _safe_tags(self._get_estimator(), "allow_nan")} diff --git a/imblearn/ensemble/_forest.py b/imblearn/ensemble/_forest.py index a7c8f9beb..5f8d08e91 100644 --- a/imblearn/ensemble/_forest.py +++ b/imblearn/ensemble/_forest.py @@ -22,7 +22,8 @@ ) from sklearn.exceptions import DataConversionWarning from sklearn.tree import DecisionTreeClassifier -from sklearn.utils import _safe_indexing, check_random_state, parse_version +from sklearn.utils import _safe_indexing, check_random_state +from sklearn.utils.fixes import parse_version from sklearn.utils.multiclass import type_of_target from sklearn.utils.validation import _check_sample_weight diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py index 539b7824f..9da02255e 100644 --- a/imblearn/ensemble/_weight_boosting.py +++ b/imblearn/ensemble/_weight_boosting.py @@ -8,7 +8,8 @@ from sklearn.ensemble import AdaBoostClassifier from sklearn.ensemble._base import _set_random_states from sklearn.tree import DecisionTreeClassifier -from sklearn.utils import _safe_indexing, parse_version +from sklearn.utils import _safe_indexing +from sklearn.utils.fixes import parse_version from sklearn.utils.validation import has_fit_parameter from ..base import _ParamsValidationMixin diff --git a/imblearn/ensemble/tests/test_bagging.py b/imblearn/ensemble/tests/test_bagging.py index 5705de553..382597183 100644 --- a/imblearn/ensemble/tests/test_bagging.py +++ b/imblearn/ensemble/tests/test_bagging.py @@ -174,7 +174,7 @@ def test_probability(): # Degenerate case, where some classes are missing ensemble = BalancedBaggingClassifier( - estimator=LogisticRegression(solver="lbfgs", multi_class="auto"), + estimator=LogisticRegression(solver="lbfgs"), random_state=0, max_samples=5, ) @@ -435,7 +435,7 @@ def test_estimators_samples(): # remap the y outside of the BalancedBaggingclassifier # _, y = np.unique(y, return_inverse=True) bagging = BalancedBaggingClassifier( - LogisticRegression(solver="lbfgs", multi_class="auto"), + LogisticRegression(), max_samples=0.5, max_features=0.5, random_state=1, diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py index f49ecd0aa..a073d846d 100644 --- a/imblearn/keras/tests/test_generator.py +++ b/imblearn/keras/tests/test_generator.py @@ -70,7 +70,7 @@ def test_balanced_batch_generator_class(data, sampler, sample_weight): batch_size=10, random_state=42, ) - model.fit_generator(generator=training_generator, epochs=10) + model.fit(training_generator, epochs=10) @pytest.mark.parametrize("keep_sparse", [True, False]) @@ -122,8 +122,8 @@ def test_balanced_batch_generator_function(data, sampler, sample_weight): batch_size=10, random_state=42, ) - model.fit_generator( - generator=training_generator, + model.fit( + training_generator, steps_per_epoch=steps_per_epoch, epochs=10, ) diff --git a/imblearn/metrics/pairwise.py b/imblearn/metrics/pairwise.py index 11f654f02..40f099258 100644 --- a/imblearn/metrics/pairwise.py +++ b/imblearn/metrics/pairwise.py @@ -161,7 +161,7 @@ def fit(self, X, y): f"elements in n_categories and {self.n_features_in_} in " f"X." ) - self.n_categories_ = np.array(self.n_categories, copy=False) + self.n_categories_ = np.asarray(self.n_categories) classes = unique_labels(y) # list of length n_features of ndarray (n_categories, n_classes) diff --git a/imblearn/over_sampling/_smote/base.py b/imblearn/over_sampling/_smote/base.py index 93b7e8a7b..8ef902920 100644 --- a/imblearn/over_sampling/_smote/base.py +++ b/imblearn/over_sampling/_smote/base.py @@ -11,16 +11,17 @@ import warnings import numpy as np +import sklearn from scipy import sparse from sklearn.base import clone from sklearn.exceptions import DataConversionWarning from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder from sklearn.utils import ( - _get_column_indices, _safe_indexing, check_array, check_random_state, ) +from sklearn.utils.fixes import parse_version from sklearn.utils.sparsefuncs_fast import ( csr_mean_variance_axis0, ) @@ -34,6 +35,12 @@ from ...utils.fixes import _is_pandas_df, _mode from ..base import BaseOverSampler +sklearn_version = parse_version(sklearn.__version__).base_version +if parse_version(sklearn_version) < parse_version("1.5"): + from sklearn.utils import _get_column_indices +else: + from sklearn.utils._indexing import _get_column_indices + class BaseSMOTE(BaseOverSampler): """Base class for the different SMOTE algorithms.""" diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py index 01eead7ea..7453446ad 100644 --- a/imblearn/pipeline.py +++ b/imblearn/pipeline.py @@ -12,9 +12,11 @@ # Christos Aridas # Guillaume Lemaitre # License: BSD +import sklearn from sklearn import pipeline from sklearn.base import clone -from sklearn.utils import Bunch, _print_elapsed_time +from sklearn.utils import Bunch +from sklearn.utils.fixes import parse_version from sklearn.utils.metaestimators import available_if from sklearn.utils.validation import check_memory @@ -34,6 +36,12 @@ __all__ = ["Pipeline", "make_pipeline"] +sklearn_version = parse_version(sklearn.__version__).base_version +if parse_version(sklearn_version) < parse_version("1.5"): + from sklearn.utils import _print_elapsed_time +else: + from sklearn.utils._user_interface import _print_elapsed_time + class Pipeline(_ParamsValidationMixin, pipeline.Pipeline): """Pipeline of transforms and resamples with a final estimator. @@ -163,11 +171,12 @@ def _validate_steps(self): for t in transformers: if t is None or t == "passthrough": continue - if not ( - hasattr(t, "fit") - or hasattr(t, "fit_transform") - or hasattr(t, "fit_resample") - ) or not (hasattr(t, "transform") or hasattr(t, "fit_resample")): + + is_transfomer = hasattr(t, "fit") and hasattr(t, "transform") + is_sampler = hasattr(t, "fit_resample") + is_not_transfomer_or_sampler = not (is_transfomer or is_sampler) + + if is_not_transfomer_or_sampler: raise TypeError( "All intermediate steps of the chain should " "be estimators that implement fit and transform or " @@ -175,9 +184,7 @@ def _validate_steps(self): "'%s' (type %s) doesn't)" % (t, type(t)) ) - if hasattr(t, "fit_resample") and ( - hasattr(t, "fit_transform") or hasattr(t, "transform") - ): + if is_transfomer and is_sampler: raise TypeError( "All intermediate steps of the chain should " "be estimators that implement fit and transform or " diff --git a/imblearn/tensorflow/tests/test_generator.py b/imblearn/tensorflow/tests/test_generator.py index bcc10b8f1..e0c7a9103 100644 --- a/imblearn/tensorflow/tests/test_generator.py +++ b/imblearn/tensorflow/tests/test_generator.py @@ -1,9 +1,8 @@ -from distutils.version import LooseVersion - import numpy as np import pytest from scipy import sparse from sklearn.datasets import load_iris +from sklearn.utils.fixes import parse_version from imblearn.datasets import make_imbalance from imblearn.over_sampling import RandomOverSampler @@ -147,7 +146,7 @@ def accuracy(y_true, y_pred): @pytest.mark.parametrize("sampler", [None, NearMiss(), RandomOverSampler()]) def test_balanced_batch_generator(data, sampler): - if LooseVersion(tf.__version__) < "2": + if parse_version(tf.__version__) < parse_version("2.0.0"): check_balanced_batch_generator_tf_1_X_X(data, sampler) else: check_balanced_batch_generator_tf_2_X_X_compat_1_X_X(data, sampler) diff --git a/imblearn/tests/test_docstring_parameters.py b/imblearn/tests/test_docstring_parameters.py index b595d77d7..1bd6ecf51 100644 --- a/imblearn/tests/test_docstring_parameters.py +++ b/imblearn/tests/test_docstring_parameters.py @@ -11,7 +11,6 @@ import pytest from sklearn.datasets import make_classification from sklearn.linear_model import LogisticRegression -from sklearn.utils import IS_PYPY from sklearn.utils._testing import ( _get_func_name, check_docstring_parameters, @@ -70,7 +69,6 @@ # Python 3.7 @pytest.mark.filterwarnings("ignore::FutureWarning") @pytest.mark.filterwarnings("ignore::DeprecationWarning") -@pytest.mark.skipif(IS_PYPY, reason="test segfaults on PyPy") def test_docstring_parameters(): # Test module docstring formatting @@ -154,9 +152,6 @@ def test_tabs(): for importer, modname, ispkg in walk_packages( imblearn.__path__, prefix="imblearn." ): - if IS_PYPY: - continue - # because we don't import mod = importlib.import_module(modname) diff --git a/imblearn/tests/test_pipeline.py b/imblearn/tests/test_pipeline.py index c39758d9f..d89e03a11 100644 --- a/imblearn/tests/test_pipeline.py +++ b/imblearn/tests/test_pipeline.py @@ -272,7 +272,7 @@ def test_pipeline_methods_anova(): X = iris.data y = iris.target # Test with Anova + LogisticRegression - clf = LogisticRegression(solver="lbfgs", multi_class="auto") + clf = LogisticRegression() filter1 = SelectKBest(f_classif, k=2) pipe = Pipeline([("anova", filter1), ("logistic", clf)]) pipe.fit(X, y) @@ -410,7 +410,7 @@ def test_fit_predict_on_pipeline_without_fit_predict(): scaler = StandardScaler() pca = PCA(svd_solver="full") pipe = Pipeline([("scaler", scaler), ("pca", pca)]) - error_regex = "'PCA' object has no attribute 'fit_predict'" + error_regex = "has no attribute 'fit_predict'" with raises(AttributeError, match=error_regex): getattr(pipe, "fit_predict") @@ -639,7 +639,7 @@ def test_classes_property(): clf = make_pipeline( SelectKBest(k=1), - LogisticRegression(solver="lbfgs", multi_class="auto", random_state=0), + LogisticRegression(), ) with raises(AttributeError): getattr(clf, "classes_") @@ -1219,7 +1219,7 @@ def test_score_samples_on_pipeline_without_score_samples(): pipe.fit(X, y) with pytest.raises( AttributeError, - match="'LogisticRegression' object has no attribute 'score_samples'", + match="has no attribute 'score_samples'", ): pipe.score_samples(X) diff --git a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py index 52d9280b6..dac3f3c33 100644 --- a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py +++ b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py @@ -10,7 +10,7 @@ from collections import Counter import numpy as np -from sklearn.base import ClassifierMixin, clone +from sklearn.base import clone, is_classifier from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble._base import _set_random_states from sklearn.model_selection import StratifiedKFold, cross_val_predict @@ -140,7 +140,7 @@ def _validate_estimator(self, random_state): if ( self.estimator is not None - and isinstance(self.estimator, ClassifierMixin) + and is_classifier(self.estimator) and hasattr(self.estimator, "predict_proba") ): self.estimator_ = clone(self.estimator) diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py b/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py index 5d7008747..a63bb45a0 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py @@ -6,6 +6,7 @@ import numpy as np from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier from sklearn.naive_bayes import GaussianNB as NB +from sklearn.pipeline import make_pipeline from sklearn.utils._testing import assert_array_equal from imblearn.under_sampling import InstanceHardnessThreshold @@ -93,3 +94,19 @@ def test_iht_fit_resample_default_estimator(): assert isinstance(iht.estimator_, RandomForestClassifier) assert X_resampled.shape == (12, 2) assert y_resampled.shape == (12,) + + +def test_iht_estimator_pipeline(): + """Check that we can pass a pipeline containing a classifier. + + Checking if we have a classifier should not be based on inheriting from + `ClassifierMixin`. + + Non-regression test for: + https://github.com/scikit-learn-contrib/imbalanced-learn/pull/1049 + """ + model = make_pipeline(GradientBoostingClassifier(random_state=RND_SEED)) + iht = InstanceHardnessThreshold(estimator=model, random_state=RND_SEED) + X_resampled, y_resampled = iht.fit_resample(X, Y) + assert X_resampled.shape == (12, 2) + assert y_resampled.shape == (12,) diff --git a/imblearn/utils/_available_if.py b/imblearn/utils/_available_if.py index 9b2c5e6db..bca75e735 100644 --- a/imblearn/utils/_available_if.py +++ b/imblearn/utils/_available_if.py @@ -7,7 +7,7 @@ from types import MethodType import sklearn -from sklearn.utils import parse_version +from sklearn.utils.fixes import parse_version sklearn_version = parse_version(sklearn.__version__) diff --git a/imblearn/utils/_metadata_requests.py b/imblearn/utils/_metadata_requests.py index 1150c7d75..c81aa4ff0 100644 --- a/imblearn/utils/_metadata_requests.py +++ b/imblearn/utils/_metadata_requests.py @@ -1086,9 +1086,12 @@ def _serialize(self): def __iter__(self): if self._self_request: - yield "$self_request", RouterMappingPair( - mapping=MethodMapping.from_str("one-to-one"), - router=self._self_request, + yield ( + "$self_request", + RouterMappingPair( + mapping=MethodMapping.from_str("one-to-one"), + router=self._self_request, + ), ) for name, route_mapping in self._route_mappings.items(): yield (name, route_mapping) @@ -1234,7 +1237,7 @@ def __init__(self, name, keys, validate_keys=True): def __get__(self, instance, owner): # we would want to have a method which accepts only the expected args - def func(**kw): + def func(*args, **kw): """Updates the request for provided parameters This docstring is overwritten below. @@ -1253,15 +1256,32 @@ def func(**kw): f"arguments are: {set(self.keys)}" ) - requests = instance._get_metadata_request() + # This makes it possible to use the decorated method as an unbound + # method, for instance when monkeypatching. + # https://github.com/scikit-learn/scikit-learn/issues/28632 + if instance is None: + _instance = args[0] + args = args[1:] + else: + _instance = instance + + # Replicating python's behavior when positional args are given other + # than `self`, and `self` is only allowed if this method is unbound. + if args: + raise TypeError( + f"set_{self.name}_request() takes 0 positional argument but" + f" {len(args)} were given" + ) + + requests = _instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) - instance._metadata_request = requests + _instance._metadata_request = requests - return instance + return _instance # Now we set the relevant attributes of the function so that it seems # like a normal method to the end user, with known expected arguments. @@ -1525,13 +1545,13 @@ def process_routing(_obj, _method, /, **kwargs): metadata to corresponding methods or corresponding child objects. The object names are those defined in `obj.get_metadata_routing()`. """ - if not _routing_enabled() and not kwargs: + if not kwargs: # If routing is not enabled and kwargs are empty, then we don't have to # try doing any routing, we can simply return a structure which returns # an empty dict on routed_params.ANYTHING.ANY_METHOD. class EmptyRequest: def get(self, name, default=None): - return default if default else {} + return Bunch(**{method: dict() for method in METHODS}) def __getitem__(self, name): return Bunch(**{method: dict() for method in METHODS}) diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py index 570427759..2fc893391 100644 --- a/imblearn/utils/estimator_checks.py +++ b/imblearn/utils/estimator_checks.py @@ -309,7 +309,7 @@ def check_samplers_sparse(name, sampler_orig): sampler = clone(sampler) X_res, y_res = sampler.fit_resample(X, y) assert sparse.issparse(X_res_sparse) - assert_allclose(X_res_sparse.A, X_res, rtol=1e-5) + assert_allclose(X_res_sparse.toarray(), X_res, rtol=1e-5) assert_allclose(y_res_sparse, y_res) diff --git a/imblearn/utils/tests/test_docstring.py b/imblearn/utils/tests/test_docstring.py index 0109fdb31..4a0753663 100644 --- a/imblearn/utils/tests/test_docstring.py +++ b/imblearn/utils/tests/test_docstring.py @@ -3,11 +3,23 @@ # Authors: Guillaume Lemaitre # License: MIT +import sys +import textwrap + import pytest from imblearn.utils import Substitution from imblearn.utils._docstring import _n_jobs_docstring, _random_state_docstring + +def _dedent_docstring(docstring): + """Compatibility with Python 3.13+. + + xref: https://github.com/python/cpython/issues/81283 + """ + return "\n".join([textwrap.dedent(line) for line in docstring.split("\n")]) + + func_docstring = """A function. Parameters @@ -55,6 +67,11 @@ def __init__(self, param_1, param_2): self.param_2 = param_2 +if sys.version_info >= (3, 13): + func_docstring = _dedent_docstring(func_docstring) + cls_docstring = _dedent_docstring(cls_docstring) + + @pytest.mark.parametrize( "obj, obj_docstring", [(func, func_docstring), (cls, cls_docstring)] ) diff --git a/setup.cfg b/setup.cfg index b14c9e447..5cd5d6139 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.12.0 +current_version = 0.12.4 tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? serialize = diff --git a/setup.py b/setup.py index f7856666a..5e26c3480 100755 --- a/setup.py +++ b/setup.py @@ -49,10 +49,10 @@ "Operating System :: POSIX", "Operating System :: Unix", "Operating System :: MacOS", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] PYTHON_REQUIRES = ">=3.8" INSTALL_REQUIRES = (min_deps.tag_to_packages["install"],)