From e34aa0ea0223205261d2852f8435de493a9e91f9 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Mon, 13 Jan 2020 13:59:06 +0100
Subject: [PATCH] remoe _refresh_cache

---
 sklearn/datasets/_base.py                  | 28 ------------
 sklearn/datasets/_california_housing.py    |  5 +-
 sklearn/datasets/_covtype.py               |  7 +--
 sklearn/datasets/_kddcup99.py              |  7 +--
 sklearn/datasets/_olivetti_faces.py        |  5 +-
 sklearn/datasets/_rcv1.py                  | 13 ++----
 sklearn/datasets/_species_distributions.py |  5 +-
 sklearn/datasets/tests/test_base.py        | 53 ----------------------
 8 files changed, 11 insertions(+), 112 deletions(-)

diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py
index bdb5cfe8772ca..62bffb947a8ff 100644
--- a/sklearn/datasets/_base.py
+++ b/sklearn/datasets/_base.py
@@ -919,31 +919,3 @@ def _fetch_remote(remote, dirname=None):
                       "file may be corrupted.".format(file_path, checksum,
                                                       remote.checksum))
     return file_path
-
-
-def _refresh_cache(files, compress):
-    # TODO: REMOVE in v0.23
-    import joblib
-    msg = "sklearn.externals.joblib is deprecated in 0.21"
-    with warnings.catch_warnings(record=True) as warns:
-        data = tuple([joblib.load(f) for f in files])
-
-    refresh_needed = any([str(x.message).startswith(msg) for x in warns])
-
-    other_warns = [w for w in warns if not str(w.message).startswith(msg)]
-    for w in other_warns:
-        warnings.warn(message=w.message, category=w.category)
-
-    if refresh_needed:
-        try:
-            for value, path in zip(data, files):
-                joblib.dump(value, path, compress=compress)
-        except IOError:
-            message = ("This dataset will stop being loadable in scikit-learn "
-                       "version 0.23 because it references a deprecated "
-                       "import path. Consider removing the following files "
-                       "and allowing it to be cached anew:\n%s"
-                       % ("\n".join(files)))
-            warnings.warn(message=message, category=FutureWarning)
-
-    return data[0] if len(data) == 1 else data
diff --git a/sklearn/datasets/_california_housing.py b/sklearn/datasets/_california_housing.py
index c71ebf3871b75..958184369b63d 100644
--- a/sklearn/datasets/_california_housing.py
+++ b/sklearn/datasets/_california_housing.py
@@ -35,7 +35,6 @@
 from ._base import _fetch_remote
 from ._base import _pkl_filepath
 from ._base import RemoteFileMetadata
-from ._base import _refresh_cache
 from ..utils import Bunch
 
 # The original data can be found at:
@@ -146,9 +145,7 @@ def fetch_california_housing(data_home=None, download_if_missing=True,
         remove(archive_path)
 
     else:
-        cal_housing = _refresh_cache([filepath], 6)
-        # TODO: Revert to the following line in v0.23
-        # cal_housing = joblib.load(filepath)
+        cal_housing = joblib.load(filepath)
 
     feature_names = ["MedInc", "HouseAge", "AveRooms", "AveBedrms",
                      "Population", "AveOccup", "Latitude", "Longitude"]
diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py
index f9fab853adc84..367ec1f9e2970 100644
--- a/sklearn/datasets/_covtype.py
+++ b/sklearn/datasets/_covtype.py
@@ -25,7 +25,6 @@
 from . import get_data_home
 from ._base import _fetch_remote
 from ._base import RemoteFileMetadata
-from ._base import _refresh_cache
 from ..utils import Bunch
 from ._base import _pkl_filepath
 from ..utils import check_random_state
@@ -126,10 +125,8 @@ def fetch_covtype(data_home=None, download_if_missing=True,
     try:
         X, y
     except NameError:
-        X, y = _refresh_cache([samples_path, targets_path], 9)
-        # TODO: Revert to the following two lines in v0.23
-        # X = joblib.load(samples_path)
-        # y = joblib.load(targets_path)
+        X = joblib.load(samples_path)
+        y = joblib.load(targets_path)
 
     if shuffle:
         ind = np.arange(X.shape[0])
diff --git a/sklearn/datasets/_kddcup99.py b/sklearn/datasets/_kddcup99.py
index 0a8121521ac82..4585df8b0fb8b 100644
--- a/sklearn/datasets/_kddcup99.py
+++ b/sklearn/datasets/_kddcup99.py
@@ -20,7 +20,6 @@
 from ._base import _fetch_remote
 from . import get_data_home
 from ._base import RemoteFileMetadata
-from ._base import _refresh_cache
 from ..utils import Bunch
 from ..utils import check_random_state
 from ..utils import shuffle as shuffle_method
@@ -293,10 +292,8 @@ def _fetch_brute_kddcup99(data_home=None,
     try:
         X, y
     except NameError:
-        X, y = _refresh_cache([samples_path, targets_path], 0)
-        # TODO: Revert to the following two lines in v0.23
-        # X = joblib.load(samples_path)
-        # y = joblib.load(targets_path)
+        X = joblib.load(samples_path)
+        y = joblib.load(targets_path)
 
     return Bunch(data=X, target=y)
 
diff --git a/sklearn/datasets/_olivetti_faces.py b/sklearn/datasets/_olivetti_faces.py
index f88f088e82e8b..d1a9805b495f2 100644
--- a/sklearn/datasets/_olivetti_faces.py
+++ b/sklearn/datasets/_olivetti_faces.py
@@ -24,7 +24,6 @@
 from ._base import _fetch_remote
 from ._base import RemoteFileMetadata
 from ._base import _pkl_filepath
-from ._base import _refresh_cache
 from ..utils import check_random_state, Bunch
 
 # The original data can be found at:
@@ -110,9 +109,7 @@ def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0,
         joblib.dump(faces, filepath, compress=6)
         del mfile
     else:
-        faces = _refresh_cache([filepath], 6)
-        # TODO: Revert to the following line in v0.23
-        # faces = joblib.load(filepath)
+        faces = joblib.load(filepath)
 
     # We want floating point data, but float32 is enough (there is only
     # one byte of precision in the original uint8s anyway)
diff --git a/sklearn/datasets/_rcv1.py b/sklearn/datasets/_rcv1.py
index 0836fe1249271..d930a347b7f7c 100644
--- a/sklearn/datasets/_rcv1.py
+++ b/sklearn/datasets/_rcv1.py
@@ -22,7 +22,6 @@
 from ._base import _pkl_filepath
 from ._base import _fetch_remote
 from ._base import RemoteFileMetadata
-from ._base import _refresh_cache
 from ._svmlight_format_io import load_svmlight_files
 from ..utils import shuffle as shuffle_
 from ..utils import Bunch
@@ -190,10 +189,8 @@ def fetch_rcv1(data_home=None, subset='all', download_if_missing=True,
             f.close()
             remove(f.name)
     else:
-        X, sample_id = _refresh_cache([samples_path, sample_id_path], 9)
-        # TODO: Revert to the following two lines in v0.23
-        # X = joblib.load(samples_path)
-        # sample_id = joblib.load(sample_id_path)
+        X = joblib.load(samples_path)
+        sample_id = joblib.load(sample_id_path)
 
     # load target (y), categories, and sample_id_bis
     if download_if_missing and (not exists(sample_topics_path) or
@@ -246,10 +243,8 @@ def fetch_rcv1(data_home=None, subset='all', download_if_missing=True,
         joblib.dump(y, sample_topics_path, compress=9)
         joblib.dump(categories, topics_path, compress=9)
     else:
-        y, categories = _refresh_cache([sample_topics_path, topics_path], 9)
-        # TODO: Revert to the following two lines in v0.23
-        # y = joblib.load(sample_topics_path)
-        # categories = joblib.load(topics_path)
+        y = joblib.load(sample_topics_path)
+        categories = joblib.load(topics_path)
 
     if subset == 'all':
         pass
diff --git a/sklearn/datasets/_species_distributions.py b/sklearn/datasets/_species_distributions.py
index 99dc192af755b..7800dfce2c190 100644
--- a/sklearn/datasets/_species_distributions.py
+++ b/sklearn/datasets/_species_distributions.py
@@ -51,7 +51,6 @@
 from ._base import RemoteFileMetadata
 from ..utils import Bunch
 from ._base import _pkl_filepath
-from ._base import _refresh_cache
 
 # The original data can be found at:
 # https://biodiversityinformatics.amnh.org/open_source/maxent/samples.zip
@@ -260,8 +259,6 @@ def fetch_species_distributions(data_home=None,
                       **extra_params)
         joblib.dump(bunch, archive_path, compress=9)
     else:
-        bunch = _refresh_cache([archive_path], 9)
-        # TODO: Revert to the following line in v0.23
-        # bunch = joblib.load(archive_path)
+        bunch = joblib.load(archive_path)
 
     return bunch
diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py
index 3a0ad41ced969..7f56217e93455 100644
--- a/sklearn/datasets/tests/test_base.py
+++ b/sklearn/datasets/tests/test_base.py
@@ -24,7 +24,6 @@
 from sklearn.datasets import load_boston
 from sklearn.datasets import load_wine
 from sklearn.utils import Bunch
-from sklearn.datasets._base import _refresh_cache
 from sklearn.datasets.tests.test_common import check_return_X_y
 
 from sklearn.externals._pilutil import pillow_installed
@@ -277,55 +276,3 @@ def test_bunch_dir():
     # check that dir (important for autocomplete) shows attributes
     data = load_iris()
     assert "data" in dir(data)
-
-
-def test_refresh_cache(monkeypatch):
-    # uses pytests monkeypatch fixture
-    # https://docs.pytest.org/en/latest/monkeypatch.html
-
-    def _load_warn(*args, **kwargs):
-        # raise the warning from "externals.joblib.__init__.py"
-        # this is raised when a file persisted by the old joblib is loaded now
-        msg = ("sklearn.externals.joblib is deprecated in 0.21 and will be "
-               "removed in 0.23. Please import this functionality directly "
-               "from joblib, which can be installed with: pip install joblib. "
-               "If this warning is raised when loading pickled models, you "
-               "may need to re-serialize those models with scikit-learn "
-               "0.21+.")
-        warnings.warn(msg, FutureWarning)
-        return 0
-
-    def _load_warn_unrelated(*args, **kwargs):
-        warnings.warn("unrelated warning", FutureWarning)
-        return 0
-
-    def _dump_safe(*args, **kwargs):
-        pass
-
-    def _dump_raise(*args, **kwargs):
-        # this happens if the file is read-only and joblib.dump fails to write
-        # on it.
-        raise IOError()
-
-    # test if the dataset spesific warning is raised if load raises the joblib
-    # warning, and dump fails to dump with new joblib
-    monkeypatch.setattr(joblib, "load", _load_warn)
-    monkeypatch.setattr(joblib, "dump", _dump_raise)
-    msg = "This dataset will stop being loadable in scikit-learn"
-    with pytest.warns(FutureWarning, match=msg):
-        _refresh_cache('test', 0)
-
-    # make sure no warning is raised if load raises the warning, but dump
-    # manages to dump the new data
-    monkeypatch.setattr(joblib, "load", _load_warn)
-    monkeypatch.setattr(joblib, "dump", _dump_safe)
-    with pytest.warns(None) as warns:
-        _refresh_cache('test', 0)
-    assert len(warns) == 0
-
-    # test if an unrelated warning is still passed through and not suppressed
-    # by _refresh_cache
-    monkeypatch.setattr(joblib, "load", _load_warn_unrelated)
-    monkeypatch.setattr(joblib, "dump", _dump_safe)
-    with pytest.warns(FutureWarning, match="unrelated warning"):
-        _refresh_cache('test', 0)