Thanks to visit codestin.com
Credit goes to github.com

Skip to content
2 changes: 1 addition & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
# Linux environment to test the latest available dependencies and MKL.
pylatest_pip_openblas_pandas:
DISTRIB: 'conda-pip-latest'
PYTHON_VERSION: '3.8'
PYTHON_VERSION: '3.9'
COVERAGE: 'true'
PANDAS_VERSION: '*'
TEST_DOCSTRINGS: 'true'
Expand Down
2 changes: 1 addition & 1 deletion build_tools/azure/test_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ except ImportError:
python -c "import multiprocessing as mp; print('%d CPUs' % mp.cpu_count())"
pip list

TEST_CMD="python -m pytest --showlocals --durations=20 --junitxml=$JUNITXML"
TEST_CMD="python -m pytest -vsl --durations=20 --junitxml=$JUNITXML"

if [[ "$COVERAGE" == "true" ]]; then
export COVERAGE_PROCESS_START="$BUILD_SOURCESDIRECTORY/.coveragerc"
Expand Down
7 changes: 5 additions & 2 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
'sphinx_gallery.gen_gallery',
]

# bibtex file
bibtex_bibfiles = ['bibtex/refs.bib']

# this is needed for some reason...
# see https://github.com/numpy/numpydoc/issues/69
numpydoc_show_class_members = False
Expand Down Expand Up @@ -345,8 +348,8 @@ def patch_signature(subject, bound_method=False, follow_wrapped=True):
# https://github.com/readthedocs/sphinx_rtd_theme/pull/747/files
def setup(app):
app.registry.documenters["class"] = PatchedClassDocumenter
app.add_javascript("js/copybutton.js")
app.add_stylesheet("basic.css")
app.add_js_file("js/copybutton.js")
app.add_css_file("basic.css")
# app.connect('autodoc-process-docstring', generate_example_rst)


Expand Down
2 changes: 1 addition & 1 deletion doc/over_sampling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ In addition, :class:`RandomOverSampler` allows to sample heterogeneous data

>>> import numpy as np
>>> X_hetero = np.array([['xxx', 1, 1.0], ['yyy', 2, 2.0], ['zzz', 3, 3.0]],
... dtype=np.object)
... dtype=object)
>>> y_hetero = np.array([0, 0, 1])
>>> X_resampled, y_resampled = ros.fit_resample(X_hetero, y_hetero)
>>> print(X_resampled)
Expand Down
2 changes: 1 addition & 1 deletion doc/under_sampling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ In addition, :class:`RandomUnderSampler` allows to sample heterogeneous data
(e.g. containing some strings)::

>>> X_hetero = np.array([['xxx', 1, 1.0], ['yyy', 2, 2.0], ['zzz', 3, 3.0]],
... dtype=np.object)
... dtype=object)
>>> y_hetero = np.array([0, 0, 1])
>>> X_resampled, y_resampled = rus.fit_resample(X_hetero, y_hetero)
>>> print(X_resampled)
Expand Down
2 changes: 1 addition & 1 deletion imblearn/over_sampling/tests/test_random_over_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def test_multiclass_fit_resample():

def test_random_over_sampling_heterogeneous_data():
X_hetero = np.array(
[["xxx", 1, 1.0], ["yyy", 2, 2.0], ["zzz", 3, 3.0]], dtype=np.object
[["xxx", 1, 1.0], ["yyy", 2, 2.0], ["zzz", 3, 3.0]], dtype=object
)
y = np.array([0, 0, 1])
ros = RandomOverSampler(random_state=RND_SEED)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def test_multiclass_fit_resample():

def test_random_under_sampling_heterogeneous_data():
X_hetero = np.array(
[["xxx", 1, 1.0], ["yyy", 2, 2.0], ["zzz", 3, 3.0]], dtype=np.object
[["xxx", 1, 1.0], ["yyy", 2, 2.0], ["zzz", 3, 3.0]], dtype=object
)
y = np.array([0, 0, 1])
rus = RandomUnderSampler(random_state=RND_SEED)
Expand Down
72 changes: 43 additions & 29 deletions imblearn/utils/estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
from sklearn.cluster import KMeans
from sklearn.exceptions import SkipTestWarning
from sklearn.preprocessing import label_binarize
from sklearn.utils.estimator_checks import _mark_xfail_checks
from sklearn.utils.estimator_checks import _set_check_estimator_ids
from sklearn.utils.estimator_checks import _maybe_mark_xfail
from sklearn.utils.estimator_checks import _get_check_estimator_ids
from sklearn.utils._testing import assert_allclose
from sklearn.utils._testing import assert_raises_regex
from sklearn.utils.multiclass import type_of_target
Expand All @@ -44,7 +44,7 @@ def _set_checking_parameters(estimator):
if name == "ClusterCentroids":
estimator.set_params(
voting="soft",
estimator=KMeans(random_state=0, algorithm="full"),
estimator=KMeans(random_state=0, algorithm="full", n_init=1),
)
if name == "KMeansSMOTE":
estimator.set_params(kmeans_estimator=12)
Expand Down Expand Up @@ -117,21 +117,19 @@ def parametrize_with_checks(estimators):
... def test_sklearn_compatible_estimator(estimator, check):
... check(estimator)
"""
names = (type(estimator).__name__ for estimator in estimators)
def checks_generator():
for estimator in estimators:
name = type(estimator).__name__
for check in _yield_all_checks(estimator):
check = partial(check, name)
yield _maybe_mark_xfail(estimator, check, pytest)

checks_generator = ((clone(estimator), partial(check, name))
for name, estimator in zip(names, estimators)
for check in _yield_all_checks(estimator))
return pytest.mark.parametrize("estimator, check", checks_generator(),
ids=_get_check_estimator_ids)

checks_with_marks = (
_mark_xfail_checks(estimator, check, pytest)
for estimator, check in checks_generator)

return pytest.mark.parametrize("estimator, check", checks_with_marks,
ids=_set_check_estimator_ids)


def check_target_type(name, estimator):
def check_target_type(name, estimator_orig):
estimator = clone(estimator_orig)
# should raise warning if the target is continuous (we cannot raise error)
X = np.random.random((20, 2))
y = np.linspace(0, 1, 20)
Expand All @@ -148,7 +146,8 @@ def check_target_type(name, estimator):
)


def check_samplers_one_label(name, sampler):
def check_samplers_one_label(name, sampler_orig):
sampler = clone(sampler_orig)
error_string_fit = "Sampler can't balance when only one class is present."
X = np.random.random((20, 2))
y = np.zeros(20)
Expand All @@ -168,7 +167,8 @@ def check_samplers_one_label(name, sampler):
raise AssertionError(error_string_fit)


def check_samplers_fit(name, sampler):
def check_samplers_fit(name, sampler_orig):
sampler = clone(sampler_orig)
np.random.seed(42) # Make this test reproducible
X = np.random.random((30, 2))
y = np.array([1] * 20 + [0] * 10)
Expand All @@ -178,7 +178,8 @@ def check_samplers_fit(name, sampler):
), "No fitted attribute sampling_strategy_"


def check_samplers_fit_resample(name, sampler):
def check_samplers_fit_resample(name, sampler_orig):
sampler = clone(sampler_orig)
X, y = make_classification(
n_samples=1000,
n_classes=3,
Expand Down Expand Up @@ -213,7 +214,8 @@ def check_samplers_fit_resample(name, sampler):
)


def check_samplers_sampling_strategy_fit_resample(name, sampler):
def check_samplers_sampling_strategy_fit_resample(name, sampler_orig):
sampler = clone(sampler_orig)
# in this test we will force all samplers to not change the class 1
X, y = make_classification(
n_samples=1000,
Expand All @@ -240,7 +242,8 @@ def check_samplers_sampling_strategy_fit_resample(name, sampler):
assert Counter(y_res)[1] == expected_stat


def check_samplers_sparse(name, sampler):
def check_samplers_sparse(name, sampler_orig):
sampler = clone(sampler_orig)
# check that sparse matrices can be passed through the sampler leading to
# the same results than dense
X, y = make_classification(
Expand All @@ -252,14 +255,16 @@ def check_samplers_sparse(name, sampler):
)
X_sparse = sparse.csr_matrix(X)
X_res_sparse, y_res_sparse = sampler.fit_resample(X_sparse, y)
sampler = clone(sampler)
X_res, y_res = sampler.fit_resample(X, y)
assert sparse.issparse(X_res_sparse)
assert_allclose(X_res_sparse.A, X_res)
assert_allclose(X_res_sparse.A, X_res, rtol=1e-5)
assert_allclose(y_res_sparse, y_res)


def check_samplers_pandas(name, sampler):
def check_samplers_pandas(name, sampler_orig):
pd = pytest.importorskip("pandas")
sampler = clone(sampler_orig)
# Check that the samplers handle pandas dataframe and pandas series
X, y = make_classification(
n_samples=1000,
Expand Down Expand Up @@ -290,7 +295,8 @@ def check_samplers_pandas(name, sampler):
assert_allclose(y_res_s.to_numpy(), y_res)


def check_samplers_list(name, sampler):
def check_samplers_list(name, sampler_orig):
sampler = clone(sampler_orig)
# Check that the can samplers handle simple lists
X, y = make_classification(
n_samples=1000,
Expand All @@ -312,7 +318,8 @@ def check_samplers_list(name, sampler):
assert_allclose(y_res, y_res_list)


def check_samplers_multiclass_ova(name, sampler):
def check_samplers_multiclass_ova(name, sampler_orig):
sampler = clone(sampler_orig)
# Check that multiclass target lead to the same results than OVA encoding
X, y = make_classification(
n_samples=1000,
Expand All @@ -329,7 +336,8 @@ def check_samplers_multiclass_ova(name, sampler):
assert_allclose(y_res, y_res_ova.argmax(axis=1))


def check_samplers_2d_target(name, sampler):
def check_samplers_2d_target(name, sampler_orig):
sampler = clone(sampler_orig)
X, y = make_classification(
n_samples=100,
n_classes=3,
Expand All @@ -342,7 +350,8 @@ def check_samplers_2d_target(name, sampler):
sampler.fit_resample(X, y)


def check_samplers_preserve_dtype(name, sampler):
def check_samplers_preserve_dtype(name, sampler_orig):
sampler = clone(sampler_orig)
X, y = make_classification(
n_samples=1000,
n_classes=3,
Expand All @@ -358,7 +367,8 @@ def check_samplers_preserve_dtype(name, sampler):
assert y.dtype == y_res.dtype, "y dtype is not preserved"


def check_samplers_sample_indices(name, sampler):
def check_samplers_sample_indices(name, sampler_orig):
sampler = clone(sampler_orig)
X, y = make_classification(
n_samples=1000,
n_classes=3,
Expand All @@ -374,17 +384,21 @@ def check_samplers_sample_indices(name, sampler):
assert not hasattr(sampler, "sample_indices_")


def check_classifier_on_multilabel_or_multioutput_targets(name, estimator):
def check_classifier_on_multilabel_or_multioutput_targets(
name, estimator_orig
):
estimator = clone(estimator_orig)
X, y = make_multilabel_classification(n_samples=30)
msg = "Multilabel and multioutput targets are not supported."
with pytest.raises(ValueError, match=msg):
estimator.fit(X, y)


def check_classifiers_with_encoded_labels(name, classifier):
def check_classifiers_with_encoded_labels(name, classifier_orig):
# Non-regression test for #709
# https://github.com/scikit-learn-contrib/imbalanced-learn/issues/709
pytest.importorskip("pandas")
classifier = clone(classifier_orig)
df, y = fetch_openml("iris", version=1, as_frame=True, return_X_y=True)
df, y = make_imbalance(
df, y, sampling_strategy={
Expand Down