diff --git a/.circleci/config.yml b/.circleci/config.yml
index b5f679af6..8990d3f22 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -24,6 +24,7 @@ jobs:
- NUMPYDOC_VERSION: 'latest'
- SPHINXCONTRIB_BIBTEX_VERSION: 'latest'
- PYDATA_SPHINX_THEME_VERSION: 'latest'
+ - SPHINX_DESIGN_VERSION: 'latest'
steps:
- add_ssh_keys:
fingerprints:
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 5c4218dec..98f2b4e11 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -255,7 +255,7 @@ jobs:
- template: build_tools/azure/posix.yml
parameters:
name: macOS
- vmImage: macOS-11
+ vmImage: macOS-12
dependsOn: [linting, git_commit]
condition: |
and(
diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
index 32699e8a8..9601b44aa 100755
--- a/build_tools/circle/build_doc.sh
+++ b/build_tools/circle/build_doc.sh
@@ -114,6 +114,7 @@ mamba create -n $CONDA_ENV_NAME --yes --quiet \
"$(get_dep sphinxcontrib-bibtex $SPHINXCONTRIB_BIBTEX_VERSION)" \
"$(get_dep sphinx-copybutton $SPHINXCONTRIB_BIBTEX_VERSION)" \
"$(get_dep pydata-sphinx-theme $PYDATA_SPHINX_THEME_VERSION)" \
+ "$(get_dep sphinx-design $SPHINX_DESIGN_VERSION)" \
memory_profiler packaging seaborn pytest coverage compilers tensorflow
source activate $CONDA_ENV_NAME
diff --git a/conftest.py b/conftest.py
index 45a5ce679..0dc6e5a23 100644
--- a/conftest.py
+++ b/conftest.py
@@ -7,7 +7,14 @@
import os
+import numpy as np
import pytest
+from sklearn.utils.fixes import parse_version
+
+# use legacy numpy print options to avoid failures due to NumPy 2.+ scalar
+# representation
+if parse_version(np.__version__) > parse_version("2.0.0"):
+ np.set_printoptions(legacy="1.25")
def pytest_runtest_setup(item):
diff --git a/doc/_static/css/imbalanced-learn.css b/doc/_static/css/imbalanced-learn.css
index 6c778540b..3778ee94c 100644
--- a/doc/_static/css/imbalanced-learn.css
+++ b/doc/_static/css/imbalanced-learn.css
@@ -21,39 +21,44 @@
/* Override some aspects of the pydata-sphinx-theme */
-/* Getting started index page */
+/* Main index page overview cards */
.intro-card {
- background: #fff;
- border-radius: 0;
- padding: 30px 10px 10px 10px;
- margin: 10px 0px;
-}
-
-.intro-card .card-text {
- margin: 20px 0px;
- /*min-height: 150px; */
-}
-
-.custom-button {
- background-color: #dcdcdc;
- border: none;
- color: #484848;
- text-align: center;
- text-decoration: none;
- display: inline-block;
- font-size: 0.9rem;
- border-radius: 0.5rem;
+ padding: 30px 10px 20px 10px;
+}
+
+.intro-card .sd-card-img-top {
+ margin: 10px;
+ height: 52px;
+ background: none !important;
+}
+
+.intro-card .sd-card-title {
+ color: var(--pst-color-primary);
+ font-size: var(--pst-font-size-h5);
+ padding: 1rem 0rem 0.5rem 0rem;
+}
+
+.intro-card .sd-card-footer {
+ border: none !important;
+}
+
+.intro-card .sd-card-footer p.sd-card-text {
max-width: 220px;
- padding: 0.5rem 0rem;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+.intro-card .sd-btn-secondary {
+ background-color: #6c757d !important;
+ border-color: #6c757d !important;
}
-.custom-button a {
- color: #484848;
+.intro-card .sd-btn-secondary:hover {
+ background-color: #5a6268 !important;
+ border-color: #545b62 !important;
}
-.custom-button p {
- margin-top: 0;
- margin-bottom: 0rem;
- color: #484848;
+.card, .card img {
+ background-color: var(--pst-color-background);
}
diff --git a/doc/_static/img/logo_wide_dark.png b/doc/_static/img/logo_wide_dark.png
new file mode 100644
index 000000000..38f997886
Binary files /dev/null and b/doc/_static/img/logo_wide_dark.png differ
diff --git a/doc/_static/index_api.svg b/doc/_static/index_api.svg
new file mode 100644
index 000000000..69f7ba1d2
--- /dev/null
+++ b/doc/_static/index_api.svg
@@ -0,0 +1,97 @@
+
+
+
+
diff --git a/doc/_static/index_examples.svg b/doc/_static/index_examples.svg
new file mode 100644
index 000000000..de3d90237
--- /dev/null
+++ b/doc/_static/index_examples.svg
@@ -0,0 +1,76 @@
+
+
+
+
diff --git a/doc/_static/index_getting_started.svg b/doc/_static/index_getting_started.svg
new file mode 100644
index 000000000..2d36622cb
--- /dev/null
+++ b/doc/_static/index_getting_started.svg
@@ -0,0 +1,66 @@
+
+
+
+
diff --git a/doc/_static/index_user_guide.svg b/doc/_static/index_user_guide.svg
new file mode 100644
index 000000000..bd1705351
--- /dev/null
+++ b/doc/_static/index_user_guide.svg
@@ -0,0 +1,67 @@
+
+
+
+
diff --git a/doc/conf.py b/doc/conf.py
index a6361eafd..5561808ab 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -43,6 +43,7 @@
"sphinx_issues",
"sphinx_gallery.gen_gallery",
"sphinx_copybutton",
+ "sphinx_design",
]
# Specify how to identify the prompt when copying code snippets
@@ -106,10 +107,12 @@
html_theme_options = {
"external_links": [],
"github_url": "https://github.com/scikit-learn-contrib/imbalanced-learn",
- # "twitter_url": "https://twitter.com/pandas_dev",
"use_edit_page_button": True,
"show_toc_level": 1,
# "navbar_align": "right", # For testing that the navbar items align properly
+ "logo": {
+ "image_dark": "https://imbalanced-learn.org/stable/_static/img/logo_wide_dark.png"
+ },
}
html_context = {
@@ -323,15 +326,7 @@ def generate_min_dependency_substitutions(app):
# -- Additional temporary hacks -----------------------------------------------
-# Temporary work-around for spacing problem between parameter and parameter
-# type in the doc, see https://github.com/numpy/numpydoc/issues/215. The bug
-# has been fixed in sphinx (https://github.com/sphinx-doc/sphinx/pull/5976) but
-# through a change in sphinx basic.css except rtd_theme does not use basic.css.
-# In an ideal world, this would get fixed in this PR:
-# https://github.com/readthedocs/sphinx_rtd_theme/pull/747/files
-
def setup(app):
app.connect("builder-inited", generate_min_dependency_table)
app.connect("builder-inited", generate_min_dependency_substitutions)
- app.add_css_file("basic.css")
diff --git a/doc/index.rst b/doc/index.rst
index aa3d7a9b2..238786314 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -21,80 +21,82 @@ Imbalanced-learn (imported as :mod:`imblearn`) is an open source, MIT-licensed
library relying on scikit-learn (imported as :mod:`sklearn`) and provides tools
when dealing with classification with imbalanced classes.
-.. raw:: html
-
-
-
-
-
-
-
-
Getting started
-
Check out the getting started guides to install imbalanced-learn.
- Some extra information to get started with a new contribution is also provided.
-
-.. container:: custom-button
-
- :ref:`To the installation guideline`
-
-.. raw:: html
-
-
-
-
-
-
-
-
-
User guide
-
The user guide provides in-depth information on the
- key concepts of imbalanced-learn with useful background information and explanation.
-
-.. container:: custom-button
-
- :ref:`To the user guide`
-
-.. raw:: html
-
-
-
-
-
-
-
-
-
API reference
-
The reference guide contains a detailed description of
- the imbalanced-learn API. To known more about methods parameters.
-
-.. container:: custom-button
-
- :ref:`To the reference guide`
-
-.. raw:: html
-
-
-
-
-
-
-
-
-
Examples
-
The gallery of examples is a good place to see imbalanced-learn in action.
- Select an example and dive in.
-
-.. container:: custom-button
-
- :ref:`To the gallery of examples`
-
-.. raw:: html
-
-
-
-
-
-
+.. grid:: 1 2 2 2
+ :gutter: 4
+ :padding: 2 2 0 0
+ :class-container: sd-text-center
+
+ .. grid-item-card:: Getting started
+ :img-top: _static/index_getting_started.svg
+ :class-card: intro-card
+ :shadow: md
+
+ Check out the getting started guides to install `imbalanced-learn`.
+ Some extra information to get started with a new contribution is also provided.
+
+ +++
+
+ .. button-ref:: getting_started
+ :ref-type: ref
+ :click-parent:
+ :color: secondary
+ :expand:
+
+ To the installation guideline
+
+ .. grid-item-card:: User guide
+ :img-top: _static/index_user_guide.svg
+ :class-card: intro-card
+ :shadow: md
+
+ The user guide provides in-depth information on the key concepts of
+ `imbalanced-learn` with useful background information and explanation.
+
+ +++
+
+ .. button-ref:: user_guide
+ :ref-type: ref
+ :click-parent:
+ :color: secondary
+ :expand:
+
+ To the user guide
+
+ .. grid-item-card:: API reference
+ :img-top: _static/index_api.svg
+ :class-card: intro-card
+ :shadow: md
+
+ The reference guide contains a detailed description of
+ the `imbalanced-learn` API. To known more about methods parameters.
+
+ +++
+
+ .. button-ref:: api
+ :ref-type: ref
+ :click-parent:
+ :color: secondary
+ :expand:
+
+ To the reference guide
+
+ .. grid-item-card:: Examples
+ :img-top: _static/index_examples.svg
+ :class-card: intro-card
+ :shadow: md
+
+ The gallery of examples is a good place to see `imbalanced-learn` in action.
+ Select an example and dive in.
+
+ +++
+
+ .. button-ref:: general_examples
+ :ref-type: ref
+ :click-parent:
+ :color: secondary
+ :expand:
+
+ To the gallery of examples
.. toctree::
diff --git a/doc/under_sampling.rst b/doc/under_sampling.rst
index 499b5a3d9..8f8e7fbb8 100644
--- a/doc/under_sampling.rst
+++ b/doc/under_sampling.rst
@@ -497,8 +497,7 @@ The class can be used as::
>>> from sklearn.linear_model import LogisticRegression
>>> from imblearn.under_sampling import InstanceHardnessThreshold
>>> iht = InstanceHardnessThreshold(random_state=0,
- ... estimator=LogisticRegression(
- ... solver='lbfgs', multi_class='auto'))
+ ... estimator=LogisticRegression())
>>> X_resampled, y_resampled = iht.fit_resample(X, y)
>>> print(sorted(Counter(y_resampled).items()))
[(0, 64), (1, 64), (2, 64)]
diff --git a/doc/whats_new/v0.11.rst b/doc/whats_new/v0.11.rst
index b36d3a902..8f421ee69 100644
--- a/doc/whats_new/v0.11.rst
+++ b/doc/whats_new/v0.11.rst
@@ -1,37 +1,5 @@
.. _changes_0_11:
-Version 0.11.1
-==============
-
-Changelog
----------
-
-Bug fixes
-.........
-
-- Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` where the entries of the
- one-hot encoding should be divided by `sqrt(2)` and not `2`, taking into account that
- they are plugged into an Euclidean distance computation.
- :pr:`1014` by :user:`Guillaume Lemaitre `.
-
-- Raise an informative error message when all support vectors are tagged as noise in
- :class:`~imblearn.over_sampling.SVMSMOTE`.
- :pr:`1016` by :user:`Guillaume Lemaitre `.
-
-- Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` where the median of standard
- deviation of the continuous features was only computed on the minority class. Now,
- we are computing this statistic for each class that is up-sampled.
- :pr:`1015` by :user:`Guillaume Lemaitre `.
-
-- Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` such that the case where
- the median of standard deviation of the continuous features is null is handled
- in the multiclass case as well.
- :pr:`1015` by :user:`Guillaume Lemaitre `.
-
-- Fix a bug in :class:`~imblearn.over_sampling.BorderlineSMOTE` version 2 where samples
- should be generated from the whole dataset and not only from the minority class.
- :pr:`1023` by :user:`Guillaume Lemaitre `.
-
Version 0.11.0
==============
diff --git a/doc/whats_new/v0.12.rst b/doc/whats_new/v0.12.rst
index df9df54a1..fb79497d8 100644
--- a/doc/whats_new/v0.12.rst
+++ b/doc/whats_new/v0.12.rst
@@ -1,6 +1,73 @@
.. _changes_0_12:
-.. _changes_0_12:
+Version 0.12.4
+==============
+
+**October 4, 2024**
+
+Changelog
+---------
+
+Compatibility
+.............
+
+- Compatibility with NumPy 2.0+
+ :pr:`1097` by :user:`Guillaume Lemaitre `.
+
+Version 0.12.3
+==============
+
+**May 28, 2024**
+
+Changelog
+---------
+
+Compatibility
+.............
+
+- Compatibility with scikit-learn 1.5
+ :pr:`1074` and :pr:`1084` by :user:`Guillaume Lemaitre `.
+
+Version 0.12.2
+==============
+
+**March 31, 2024**
+
+Changelog
+---------
+
+Bug fixes
+.........
+
+- Fix the way we check for a specific Python version in the test suite.
+ :pr:`1075` by :user:`Guillaume Lemaitre `.
+
+Version 0.12.1
+==============
+
+**March 31, 2024**
+
+Changelog
+---------
+
+Bug fixes
+.........
+
+- Fix a bug in :class:`~imblearn.under_sampling.InstanceHardnessThreshold` where
+ `estimator` could not be a :class:`~sklearn.pipeline.Pipeline` object.
+ :pr:`1049` by :user:`Gonenc Mogol `.
+
+Compatibility
+.............
+
+- Do not use `distutils` in tests due to deprecation.
+ :pr:`1065` by :user:`Michael R. Crusoe `.
+
+- Fix the scikit-learn import in tests to be compatible with version 1.4.1.post1.
+ :pr:`1073` by :user:`Guillaume Lemaitre `.
+
+- Fix test to be compatible with Python 3.13.
+ :pr:`1073` by :user:`Guillaume Lemaitre `.
Version 0.12.0
==============
@@ -13,6 +80,29 @@ Changelog
Bug fixes
.........
+- Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` where the entries of the
+ one-hot encoding should be divided by `sqrt(2)` and not `2`, taking into account that
+ they are plugged into an Euclidean distance computation.
+ :pr:`1014` by :user:`Guillaume Lemaitre `.
+
+- Raise an informative error message when all support vectors are tagged as noise in
+ :class:`~imblearn.over_sampling.SVMSMOTE`.
+ :pr:`1016` by :user:`Guillaume Lemaitre `.
+
+- Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` where the median of standard
+ deviation of the continuous features was only computed on the minority class. Now,
+ we are computing this statistic for each class that is up-sampled.
+ :pr:`1015` by :user:`Guillaume Lemaitre `.
+
+- Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` such that the case where
+ the median of standard deviation of the continuous features is null is handled
+ in the multiclass case as well.
+ :pr:`1015` by :user:`Guillaume Lemaitre `.
+
+- Fix a bug in :class:`~imblearn.over_sampling.BorderlineSMOTE` version 2 where samples
+ should be generated from the whole dataset and not only from the minority class.
+ :pr:`1023` by :user:`Guillaume Lemaitre `.
+
- Fix a bug in :class:`~imblearn.under_sampling.NeighbourhoodCleaningRule` where the
`kind_sel="all"` was not working as explained in the literature.
:pr:`1012` by :user:`Guillaume Lemaitre `.
diff --git a/examples/api/plot_sampling_strategy_usage.py b/examples/api/plot_sampling_strategy_usage.py
index dbb52fcdf..1c76a06b2 100644
--- a/examples/api/plot_sampling_strategy_usage.py
+++ b/examples/api/plot_sampling_strategy_usage.py
@@ -129,7 +129,7 @@
# %% [markdown]
# `sampling_strategy` as a `dict`
-# ------------------------------
+# -------------------------------
#
# When `sampling_strategy` is a `dict`, the keys correspond to the targeted
# classes. The values correspond to the desired number of samples for each
diff --git a/examples/applications/plot_outlier_rejections.py b/examples/applications/plot_outlier_rejections.py
index 55f03e273..985b9211a 100644
--- a/examples/applications/plot_outlier_rejections.py
+++ b/examples/applications/plot_outlier_rejections.py
@@ -109,12 +109,12 @@ def outlier_rejection(X, y):
pipe = make_pipeline(
FunctionSampler(func=outlier_rejection),
- LogisticRegression(solver="lbfgs", multi_class="auto", random_state=rng),
+ LogisticRegression(random_state=rng),
)
y_pred = pipe.fit(X_train, y_train).predict(X_test)
print(classification_report(y_test, y_pred))
-clf = LogisticRegression(solver="lbfgs", multi_class="auto", random_state=rng)
+clf = LogisticRegression(random_state=rng)
y_pred = clf.fit(X_train, y_train).predict(X_test)
print(classification_report(y_test, y_pred))
diff --git a/examples/applications/porto_seguro_keras_under_sampling.py b/examples/applications/porto_seguro_keras_under_sampling.py
index ee8a6e2f0..9175427fa 100644
--- a/examples/applications/porto_seguro_keras_under_sampling.py
+++ b/examples/applications/porto_seguro_keras_under_sampling.py
@@ -151,7 +151,7 @@ def wrapper(*args, **kwds):
# mini-batches.
import tensorflow
from sklearn.metrics import roc_auc_score
-from sklearn.utils import parse_version
+from sklearn.utils.fixes import parse_version
tf_version = parse_version(tensorflow.__version__)
diff --git a/examples/ensemble/plot_comparison_ensemble_classifier.py b/examples/ensemble/plot_comparison_ensemble_classifier.py
index 602e477e5..8c318e5bc 100644
--- a/examples/ensemble/plot_comparison_ensemble_classifier.py
+++ b/examples/ensemble/plot_comparison_ensemble_classifier.py
@@ -197,7 +197,7 @@
from imblearn.ensemble import EasyEnsembleClassifier, RUSBoostClassifier
-estimator = AdaBoostClassifier(n_estimators=10)
+estimator = AdaBoostClassifier(n_estimators=10, algorithm="SAMME")
eec = EasyEnsembleClassifier(n_estimators=10, estimator=estimator)
eec.fit(X_train, y_train)
y_pred_eec = eec.predict(X_test)
diff --git a/imblearn/_config.py b/imblearn/_config.py
index 4c093db09..ef98e7305 100644
--- a/imblearn/_config.py
+++ b/imblearn/_config.py
@@ -7,7 +7,7 @@
from contextlib import contextmanager as contextmanager
import sklearn
-from sklearn.utils import parse_version
+from sklearn.utils.fixes import parse_version
sklearn_version = parse_version(sklearn.__version__)
diff --git a/imblearn/_min_dependencies.py b/imblearn/_min_dependencies.py
index 497688765..ec1f5dedb 100644
--- a/imblearn/_min_dependencies.py
+++ b/imblearn/_min_dependencies.py
@@ -37,6 +37,7 @@
"numpydoc": ("1.5.0", "docs"),
"sphinxcontrib-bibtex": ("2.4.1", "docs"),
"pydata-sphinx-theme": ("0.13.3", "docs"),
+ "sphinx-design": ("0.5.0", "docs"),
}
diff --git a/imblearn/_version.py b/imblearn/_version.py
index c0fef945a..ff7e11ace 100644
--- a/imblearn/_version.py
+++ b/imblearn/_version.py
@@ -22,4 +22,4 @@
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
#
-__version__ = "0.12.0"
+__version__ = "0.12.4"
diff --git a/imblearn/base.py b/imblearn/base.py
index e529fead6..0b2d94e84 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -17,7 +17,7 @@
from sklearn.base import _OneToOneFeatureMixin as OneToOneFeatureMixin
from sklearn.preprocessing import label_binarize
-from sklearn.utils import parse_version
+from sklearn.utils.fixes import parse_version
from sklearn.utils.multiclass import check_classification_targets
from .utils import check_sampling_strategy, check_target_type
diff --git a/imblearn/ensemble/_bagging.py b/imblearn/ensemble/_bagging.py
index afcf3fd3a..acb0c70fa 100644
--- a/imblearn/ensemble/_bagging.py
+++ b/imblearn/ensemble/_bagging.py
@@ -16,7 +16,7 @@
from sklearn.ensemble._base import _partition_estimators
from sklearn.exceptions import NotFittedError
from sklearn.tree import DecisionTreeClassifier
-from sklearn.utils import parse_version
+from sklearn.utils.fixes import parse_version
from sklearn.utils.validation import check_is_fitted
try:
@@ -386,7 +386,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
self.sampler_ = clone(self.sampler)
# RandomUnderSampler is not supporting sample_weight. We need to pass
# None.
- return super()._fit(X, y, self.max_samples, sample_weight=None)
+ return super()._fit(X, y, self.max_samples)
# TODO: remove when minimum supported version of scikit-learn is 1.1
@available_if(_estimator_has("decision_function"))
diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py
index db3c6cbb7..e3c85741c 100644
--- a/imblearn/ensemble/_easy_ensemble.py
+++ b/imblearn/ensemble/_easy_ensemble.py
@@ -15,8 +15,8 @@
from sklearn.ensemble._bagging import _parallel_decision_function
from sklearn.ensemble._base import _partition_estimators
from sklearn.exceptions import NotFittedError
-from sklearn.utils import parse_version
from sklearn.utils._tags import _safe_tags
+from sklearn.utils.fixes import parse_version
from sklearn.utils.validation import check_is_fitted
try:
@@ -300,7 +300,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
check_target_type(y)
# RandomUnderSampler is not supporting sample_weight. We need to pass
# None.
- return super()._fit(X, y, self.max_samples, sample_weight=None)
+ return super()._fit(X, y, self.max_samples)
# TODO: remove when minimum supported version of scikit-learn is 1.1
@available_if(_estimator_has("decision_function"))
@@ -365,9 +365,11 @@ def base_estimator_(self):
raise error
raise error
- def _more_tags(self):
+ def _get_estimator(self):
if self.estimator is None:
- estimator = AdaBoostClassifier(algorithm="SAMME")
- else:
- estimator = self.estimator
- return {"allow_nan": _safe_tags(estimator, "allow_nan")}
+ return AdaBoostClassifier(algorithm="SAMME")
+ return self.estimator
+
+ # TODO: remove when minimum supported version of scikit-learn is 1.5
+ def _more_tags(self):
+ return {"allow_nan": _safe_tags(self._get_estimator(), "allow_nan")}
diff --git a/imblearn/ensemble/_forest.py b/imblearn/ensemble/_forest.py
index a7c8f9beb..5f8d08e91 100644
--- a/imblearn/ensemble/_forest.py
+++ b/imblearn/ensemble/_forest.py
@@ -22,7 +22,8 @@
)
from sklearn.exceptions import DataConversionWarning
from sklearn.tree import DecisionTreeClassifier
-from sklearn.utils import _safe_indexing, check_random_state, parse_version
+from sklearn.utils import _safe_indexing, check_random_state
+from sklearn.utils.fixes import parse_version
from sklearn.utils.multiclass import type_of_target
from sklearn.utils.validation import _check_sample_weight
diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py
index 539b7824f..9da02255e 100644
--- a/imblearn/ensemble/_weight_boosting.py
+++ b/imblearn/ensemble/_weight_boosting.py
@@ -8,7 +8,8 @@
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble._base import _set_random_states
from sklearn.tree import DecisionTreeClassifier
-from sklearn.utils import _safe_indexing, parse_version
+from sklearn.utils import _safe_indexing
+from sklearn.utils.fixes import parse_version
from sklearn.utils.validation import has_fit_parameter
from ..base import _ParamsValidationMixin
diff --git a/imblearn/ensemble/tests/test_bagging.py b/imblearn/ensemble/tests/test_bagging.py
index 5705de553..382597183 100644
--- a/imblearn/ensemble/tests/test_bagging.py
+++ b/imblearn/ensemble/tests/test_bagging.py
@@ -174,7 +174,7 @@ def test_probability():
# Degenerate case, where some classes are missing
ensemble = BalancedBaggingClassifier(
- estimator=LogisticRegression(solver="lbfgs", multi_class="auto"),
+ estimator=LogisticRegression(solver="lbfgs"),
random_state=0,
max_samples=5,
)
@@ -435,7 +435,7 @@ def test_estimators_samples():
# remap the y outside of the BalancedBaggingclassifier
# _, y = np.unique(y, return_inverse=True)
bagging = BalancedBaggingClassifier(
- LogisticRegression(solver="lbfgs", multi_class="auto"),
+ LogisticRegression(),
max_samples=0.5,
max_features=0.5,
random_state=1,
diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py
index f49ecd0aa..a073d846d 100644
--- a/imblearn/keras/tests/test_generator.py
+++ b/imblearn/keras/tests/test_generator.py
@@ -70,7 +70,7 @@ def test_balanced_batch_generator_class(data, sampler, sample_weight):
batch_size=10,
random_state=42,
)
- model.fit_generator(generator=training_generator, epochs=10)
+ model.fit(training_generator, epochs=10)
@pytest.mark.parametrize("keep_sparse", [True, False])
@@ -122,8 +122,8 @@ def test_balanced_batch_generator_function(data, sampler, sample_weight):
batch_size=10,
random_state=42,
)
- model.fit_generator(
- generator=training_generator,
+ model.fit(
+ training_generator,
steps_per_epoch=steps_per_epoch,
epochs=10,
)
diff --git a/imblearn/metrics/pairwise.py b/imblearn/metrics/pairwise.py
index 11f654f02..40f099258 100644
--- a/imblearn/metrics/pairwise.py
+++ b/imblearn/metrics/pairwise.py
@@ -161,7 +161,7 @@ def fit(self, X, y):
f"elements in n_categories and {self.n_features_in_} in "
f"X."
)
- self.n_categories_ = np.array(self.n_categories, copy=False)
+ self.n_categories_ = np.asarray(self.n_categories)
classes = unique_labels(y)
# list of length n_features of ndarray (n_categories, n_classes)
diff --git a/imblearn/over_sampling/_smote/base.py b/imblearn/over_sampling/_smote/base.py
index 93b7e8a7b..8ef902920 100644
--- a/imblearn/over_sampling/_smote/base.py
+++ b/imblearn/over_sampling/_smote/base.py
@@ -11,16 +11,17 @@
import warnings
import numpy as np
+import sklearn
from scipy import sparse
from sklearn.base import clone
from sklearn.exceptions import DataConversionWarning
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.utils import (
- _get_column_indices,
_safe_indexing,
check_array,
check_random_state,
)
+from sklearn.utils.fixes import parse_version
from sklearn.utils.sparsefuncs_fast import (
csr_mean_variance_axis0,
)
@@ -34,6 +35,12 @@
from ...utils.fixes import _is_pandas_df, _mode
from ..base import BaseOverSampler
+sklearn_version = parse_version(sklearn.__version__).base_version
+if parse_version(sklearn_version) < parse_version("1.5"):
+ from sklearn.utils import _get_column_indices
+else:
+ from sklearn.utils._indexing import _get_column_indices
+
class BaseSMOTE(BaseOverSampler):
"""Base class for the different SMOTE algorithms."""
diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py
index 01eead7ea..7453446ad 100644
--- a/imblearn/pipeline.py
+++ b/imblearn/pipeline.py
@@ -12,9 +12,11 @@
# Christos Aridas
# Guillaume Lemaitre
# License: BSD
+import sklearn
from sklearn import pipeline
from sklearn.base import clone
-from sklearn.utils import Bunch, _print_elapsed_time
+from sklearn.utils import Bunch
+from sklearn.utils.fixes import parse_version
from sklearn.utils.metaestimators import available_if
from sklearn.utils.validation import check_memory
@@ -34,6 +36,12 @@
__all__ = ["Pipeline", "make_pipeline"]
+sklearn_version = parse_version(sklearn.__version__).base_version
+if parse_version(sklearn_version) < parse_version("1.5"):
+ from sklearn.utils import _print_elapsed_time
+else:
+ from sklearn.utils._user_interface import _print_elapsed_time
+
class Pipeline(_ParamsValidationMixin, pipeline.Pipeline):
"""Pipeline of transforms and resamples with a final estimator.
@@ -163,11 +171,12 @@ def _validate_steps(self):
for t in transformers:
if t is None or t == "passthrough":
continue
- if not (
- hasattr(t, "fit")
- or hasattr(t, "fit_transform")
- or hasattr(t, "fit_resample")
- ) or not (hasattr(t, "transform") or hasattr(t, "fit_resample")):
+
+ is_transfomer = hasattr(t, "fit") and hasattr(t, "transform")
+ is_sampler = hasattr(t, "fit_resample")
+ is_not_transfomer_or_sampler = not (is_transfomer or is_sampler)
+
+ if is_not_transfomer_or_sampler:
raise TypeError(
"All intermediate steps of the chain should "
"be estimators that implement fit and transform or "
@@ -175,9 +184,7 @@ def _validate_steps(self):
"'%s' (type %s) doesn't)" % (t, type(t))
)
- if hasattr(t, "fit_resample") and (
- hasattr(t, "fit_transform") or hasattr(t, "transform")
- ):
+ if is_transfomer and is_sampler:
raise TypeError(
"All intermediate steps of the chain should "
"be estimators that implement fit and transform or "
diff --git a/imblearn/tensorflow/tests/test_generator.py b/imblearn/tensorflow/tests/test_generator.py
index bcc10b8f1..e0c7a9103 100644
--- a/imblearn/tensorflow/tests/test_generator.py
+++ b/imblearn/tensorflow/tests/test_generator.py
@@ -1,9 +1,8 @@
-from distutils.version import LooseVersion
-
import numpy as np
import pytest
from scipy import sparse
from sklearn.datasets import load_iris
+from sklearn.utils.fixes import parse_version
from imblearn.datasets import make_imbalance
from imblearn.over_sampling import RandomOverSampler
@@ -147,7 +146,7 @@ def accuracy(y_true, y_pred):
@pytest.mark.parametrize("sampler", [None, NearMiss(), RandomOverSampler()])
def test_balanced_batch_generator(data, sampler):
- if LooseVersion(tf.__version__) < "2":
+ if parse_version(tf.__version__) < parse_version("2.0.0"):
check_balanced_batch_generator_tf_1_X_X(data, sampler)
else:
check_balanced_batch_generator_tf_2_X_X_compat_1_X_X(data, sampler)
diff --git a/imblearn/tests/test_docstring_parameters.py b/imblearn/tests/test_docstring_parameters.py
index b595d77d7..1bd6ecf51 100644
--- a/imblearn/tests/test_docstring_parameters.py
+++ b/imblearn/tests/test_docstring_parameters.py
@@ -11,7 +11,6 @@
import pytest
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
-from sklearn.utils import IS_PYPY
from sklearn.utils._testing import (
_get_func_name,
check_docstring_parameters,
@@ -70,7 +69,6 @@
# Python 3.7
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
-@pytest.mark.skipif(IS_PYPY, reason="test segfaults on PyPy")
def test_docstring_parameters():
# Test module docstring formatting
@@ -154,9 +152,6 @@ def test_tabs():
for importer, modname, ispkg in walk_packages(
imblearn.__path__, prefix="imblearn."
):
- if IS_PYPY:
- continue
-
# because we don't import
mod = importlib.import_module(modname)
diff --git a/imblearn/tests/test_pipeline.py b/imblearn/tests/test_pipeline.py
index c39758d9f..d89e03a11 100644
--- a/imblearn/tests/test_pipeline.py
+++ b/imblearn/tests/test_pipeline.py
@@ -272,7 +272,7 @@ def test_pipeline_methods_anova():
X = iris.data
y = iris.target
# Test with Anova + LogisticRegression
- clf = LogisticRegression(solver="lbfgs", multi_class="auto")
+ clf = LogisticRegression()
filter1 = SelectKBest(f_classif, k=2)
pipe = Pipeline([("anova", filter1), ("logistic", clf)])
pipe.fit(X, y)
@@ -410,7 +410,7 @@ def test_fit_predict_on_pipeline_without_fit_predict():
scaler = StandardScaler()
pca = PCA(svd_solver="full")
pipe = Pipeline([("scaler", scaler), ("pca", pca)])
- error_regex = "'PCA' object has no attribute 'fit_predict'"
+ error_regex = "has no attribute 'fit_predict'"
with raises(AttributeError, match=error_regex):
getattr(pipe, "fit_predict")
@@ -639,7 +639,7 @@ def test_classes_property():
clf = make_pipeline(
SelectKBest(k=1),
- LogisticRegression(solver="lbfgs", multi_class="auto", random_state=0),
+ LogisticRegression(),
)
with raises(AttributeError):
getattr(clf, "classes_")
@@ -1219,7 +1219,7 @@ def test_score_samples_on_pipeline_without_score_samples():
pipe.fit(X, y)
with pytest.raises(
AttributeError,
- match="'LogisticRegression' object has no attribute 'score_samples'",
+ match="has no attribute 'score_samples'",
):
pipe.score_samples(X)
diff --git a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py
index 52d9280b6..dac3f3c33 100644
--- a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py
+++ b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py
@@ -10,7 +10,7 @@
from collections import Counter
import numpy as np
-from sklearn.base import ClassifierMixin, clone
+from sklearn.base import clone, is_classifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble._base import _set_random_states
from sklearn.model_selection import StratifiedKFold, cross_val_predict
@@ -140,7 +140,7 @@ def _validate_estimator(self, random_state):
if (
self.estimator is not None
- and isinstance(self.estimator, ClassifierMixin)
+ and is_classifier(self.estimator)
and hasattr(self.estimator, "predict_proba")
):
self.estimator_ = clone(self.estimator)
diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py b/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py
index 5d7008747..a63bb45a0 100644
--- a/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py
+++ b/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py
@@ -6,6 +6,7 @@
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.naive_bayes import GaussianNB as NB
+from sklearn.pipeline import make_pipeline
from sklearn.utils._testing import assert_array_equal
from imblearn.under_sampling import InstanceHardnessThreshold
@@ -93,3 +94,19 @@ def test_iht_fit_resample_default_estimator():
assert isinstance(iht.estimator_, RandomForestClassifier)
assert X_resampled.shape == (12, 2)
assert y_resampled.shape == (12,)
+
+
+def test_iht_estimator_pipeline():
+ """Check that we can pass a pipeline containing a classifier.
+
+ Checking if we have a classifier should not be based on inheriting from
+ `ClassifierMixin`.
+
+ Non-regression test for:
+ https://github.com/scikit-learn-contrib/imbalanced-learn/pull/1049
+ """
+ model = make_pipeline(GradientBoostingClassifier(random_state=RND_SEED))
+ iht = InstanceHardnessThreshold(estimator=model, random_state=RND_SEED)
+ X_resampled, y_resampled = iht.fit_resample(X, Y)
+ assert X_resampled.shape == (12, 2)
+ assert y_resampled.shape == (12,)
diff --git a/imblearn/utils/_available_if.py b/imblearn/utils/_available_if.py
index 9b2c5e6db..bca75e735 100644
--- a/imblearn/utils/_available_if.py
+++ b/imblearn/utils/_available_if.py
@@ -7,7 +7,7 @@
from types import MethodType
import sklearn
-from sklearn.utils import parse_version
+from sklearn.utils.fixes import parse_version
sklearn_version = parse_version(sklearn.__version__)
diff --git a/imblearn/utils/_metadata_requests.py b/imblearn/utils/_metadata_requests.py
index 1150c7d75..c81aa4ff0 100644
--- a/imblearn/utils/_metadata_requests.py
+++ b/imblearn/utils/_metadata_requests.py
@@ -1086,9 +1086,12 @@ def _serialize(self):
def __iter__(self):
if self._self_request:
- yield "$self_request", RouterMappingPair(
- mapping=MethodMapping.from_str("one-to-one"),
- router=self._self_request,
+ yield (
+ "$self_request",
+ RouterMappingPair(
+ mapping=MethodMapping.from_str("one-to-one"),
+ router=self._self_request,
+ ),
)
for name, route_mapping in self._route_mappings.items():
yield (name, route_mapping)
@@ -1234,7 +1237,7 @@ def __init__(self, name, keys, validate_keys=True):
def __get__(self, instance, owner):
# we would want to have a method which accepts only the expected args
- def func(**kw):
+ def func(*args, **kw):
"""Updates the request for provided parameters
This docstring is overwritten below.
@@ -1253,15 +1256,32 @@ def func(**kw):
f"arguments are: {set(self.keys)}"
)
- requests = instance._get_metadata_request()
+ # This makes it possible to use the decorated method as an unbound
+ # method, for instance when monkeypatching.
+ # https://github.com/scikit-learn/scikit-learn/issues/28632
+ if instance is None:
+ _instance = args[0]
+ args = args[1:]
+ else:
+ _instance = instance
+
+ # Replicating python's behavior when positional args are given other
+ # than `self`, and `self` is only allowed if this method is unbound.
+ if args:
+ raise TypeError(
+ f"set_{self.name}_request() takes 0 positional argument but"
+ f" {len(args)} were given"
+ )
+
+ requests = _instance._get_metadata_request()
method_metadata_request = getattr(requests, self.name)
for prop, alias in kw.items():
if alias is not UNCHANGED:
method_metadata_request.add_request(param=prop, alias=alias)
- instance._metadata_request = requests
+ _instance._metadata_request = requests
- return instance
+ return _instance
# Now we set the relevant attributes of the function so that it seems
# like a normal method to the end user, with known expected arguments.
@@ -1525,13 +1545,13 @@ def process_routing(_obj, _method, /, **kwargs):
metadata to corresponding methods or corresponding child objects. The object
names are those defined in `obj.get_metadata_routing()`.
"""
- if not _routing_enabled() and not kwargs:
+ if not kwargs:
# If routing is not enabled and kwargs are empty, then we don't have to
# try doing any routing, we can simply return a structure which returns
# an empty dict on routed_params.ANYTHING.ANY_METHOD.
class EmptyRequest:
def get(self, name, default=None):
- return default if default else {}
+ return Bunch(**{method: dict() for method in METHODS})
def __getitem__(self, name):
return Bunch(**{method: dict() for method in METHODS})
diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py
index 570427759..2fc893391 100644
--- a/imblearn/utils/estimator_checks.py
+++ b/imblearn/utils/estimator_checks.py
@@ -309,7 +309,7 @@ def check_samplers_sparse(name, sampler_orig):
sampler = clone(sampler)
X_res, y_res = sampler.fit_resample(X, y)
assert sparse.issparse(X_res_sparse)
- assert_allclose(X_res_sparse.A, X_res, rtol=1e-5)
+ assert_allclose(X_res_sparse.toarray(), X_res, rtol=1e-5)
assert_allclose(y_res_sparse, y_res)
diff --git a/imblearn/utils/tests/test_docstring.py b/imblearn/utils/tests/test_docstring.py
index 0109fdb31..4a0753663 100644
--- a/imblearn/utils/tests/test_docstring.py
+++ b/imblearn/utils/tests/test_docstring.py
@@ -3,11 +3,23 @@
# Authors: Guillaume Lemaitre
# License: MIT
+import sys
+import textwrap
+
import pytest
from imblearn.utils import Substitution
from imblearn.utils._docstring import _n_jobs_docstring, _random_state_docstring
+
+def _dedent_docstring(docstring):
+ """Compatibility with Python 3.13+.
+
+ xref: https://github.com/python/cpython/issues/81283
+ """
+ return "\n".join([textwrap.dedent(line) for line in docstring.split("\n")])
+
+
func_docstring = """A function.
Parameters
@@ -55,6 +67,11 @@ def __init__(self, param_1, param_2):
self.param_2 = param_2
+if sys.version_info >= (3, 13):
+ func_docstring = _dedent_docstring(func_docstring)
+ cls_docstring = _dedent_docstring(cls_docstring)
+
+
@pytest.mark.parametrize(
"obj, obj_docstring", [(func, func_docstring), (cls, cls_docstring)]
)
diff --git a/setup.cfg b/setup.cfg
index b14c9e447..5cd5d6139 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
[bumpversion]
-current_version = 0.12.0
+current_version = 0.12.4
tag = False
parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))?
serialize =
diff --git a/setup.py b/setup.py
index f7856666a..5e26c3480 100755
--- a/setup.py
+++ b/setup.py
@@ -49,10 +49,10 @@
"Operating System :: POSIX",
"Operating System :: Unix",
"Operating System :: MacOS",
- "Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
]
PYTHON_REQUIRES = ">=3.8"
INSTALL_REQUIRES = (min_deps.tag_to_packages["install"],)