From 9f6648905fbdc387d01fa8c029c23616798cedd2 Mon Sep 17 00:00:00 2001
From: Roman Feldbauer <sci@feldbauer.org>
Date: Fri, 31 Jan 2020 16:56:05 +0100
Subject: [PATCH 01/15] download and test rcv1 in cron job

---
 .travis.yml                         | 2 +-
 build_tools/travis/test_script.sh   | 7 +++----
 sklearn/datasets/tests/test_rcv1.py | 8 +++++++-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 9fda90f71a7c0..03d0d4b5b638b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,4 +1,4 @@
-# make it explicit that we favor the new container-based travis workers
+  # make it explicit that we favor the new container-based travis workers
 language: python
 
 cache:
diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index f13e0f1bbb2fa..a9c8fb73f9552 100755
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -30,10 +30,9 @@ run_tests() {
     cp setup.cfg $TEST_DIR
     cd $TEST_DIR
 
-    # Skip tests that require large downloads over the network to save bandwidth
-    # usage as travis workers are stateless and therefore traditional local
-    # disk caching does not work.
-    export SKLEARN_SKIP_NETWORK_TESTS=1
+    # Tests that require large downloads over the networks are skipped in CI.
+    # Here we make sure, that they are still run on a regular basis.
+    export SKLEARN_SKIP_NETWORK_TESTS=0
 
     if [[ "$COVERAGE" == "true" ]]; then
         TEST_CMD="$TEST_CMD --cov sklearn"
diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py
index 7cae454bf158b..3738f2ba4ff87 100644
--- a/sklearn/datasets/tests/test_rcv1.py
+++ b/sklearn/datasets/tests/test_rcv1.py
@@ -4,6 +4,7 @@
 """
 
 import errno
+import os
 import scipy.sparse as sp
 import numpy as np
 from functools import partial
@@ -15,8 +16,13 @@
 
 
 def test_fetch_rcv1():
+    # Do not download data, unless explicitly requested via environment var
+    download_if_missing = False
+    if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 1)) == 0:
+        download_if_missing = True
     try:
-        data1 = fetch_rcv1(shuffle=False, download_if_missing=False)
+        data1 = fetch_rcv1(shuffle=False,
+                           download_if_missing=download_if_missing)
     except IOError as e:
         if e.errno == errno.ENOENT:
             raise SkipTest("Download RCV1 dataset to run this test.")

From c9d26fd63bcb2a38bb0eeae95d0e3c51df60430d Mon Sep 17 00:00:00 2001
From: Roman Feldbauer <sci@feldbauer.org>
Date: Fri, 31 Jan 2020 17:01:09 +0100
Subject: [PATCH 02/15] download and test 20news in cron job [scipy-dev]

---
 sklearn/datasets/tests/test_20news.py | 12 ++++++++++--
 sklearn/datasets/tests/test_rcv1.py   |  7 +++----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py
index 15cb49c44b0e5..72b5dad97d7fe 100644
--- a/sklearn/datasets/tests/test_20news.py
+++ b/sklearn/datasets/tests/test_20news.py
@@ -1,5 +1,8 @@
-"""Test the 20news downloader, if the data is available."""
+"""Test the 20news downloader, if the data is available,
+or if specifically requested via environment variable
+(e.g. for travis cron job)."""
 from functools import partial
+import os
 
 import numpy as np
 import scipy.sparse as sp
@@ -12,9 +15,14 @@
 
 
 def test_20news():
+    # Do not download data, unless explicitly requested via environment var
+    download_if_missing = False
+    if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 1)) == 0:
+        download_if_missing = True
     try:
         data = datasets.fetch_20newsgroups(
-            subset='all', download_if_missing=False, shuffle=False)
+            subset='all', download_if_missing=download_if_missing,
+            shuffle=False)
     except IOError:
         raise SkipTest("Download 20 newsgroups to run this test")
 
diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py
index 3738f2ba4ff87..f733a8879be23 100644
--- a/sklearn/datasets/tests/test_rcv1.py
+++ b/sklearn/datasets/tests/test_rcv1.py
@@ -1,7 +1,6 @@
-"""Test the rcv1 loader.
-
-Skipped if rcv1 is not already downloaded to data_home.
-"""
+"""Test the rcv1 loader, if the data is available,
+or if specifically requested via environment variable
+(e.g. for travis cron job)."""
 
 import errno
 import os

From 47072fb55a914728ce1005e7126020d50a7a496f Mon Sep 17 00:00:00 2001
From: Roman Feldbauer <sci@feldbauer.org>
Date: Fri, 31 Jan 2020 17:05:02 +0100
Subject: [PATCH 03/15] fix typo [scipy-dev]

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 03d0d4b5b638b..9fda90f71a7c0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,4 +1,4 @@
-  # make it explicit that we favor the new container-based travis workers
+# make it explicit that we favor the new container-based travis workers
 language: python
 
 cache:

From dff956aff9c091c425ea57cf9ef8757bf5796e06 Mon Sep 17 00:00:00 2001
From: Roman Feldbauer <sci@feldbauer.org>
Date: Fri, 31 Jan 2020 17:30:01 +0100
Subject: [PATCH 04/15] california_housing, covtype, kddcup99, olivetti_faces
 [scipy-dev]

---
 sklearn/datasets/tests/test_california_housing.py | 14 +++++++++-----
 sklearn/datasets/tests/test_covtype.py            | 15 ++++++++++-----
 sklearn/datasets/tests/test_kddcup99.py           | 15 ++++++++++-----
 sklearn/datasets/tests/test_olivetti_faces.py     | 12 ++++++++++--
 4 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/sklearn/datasets/tests/test_california_housing.py b/sklearn/datasets/tests/test_california_housing.py
index 56cd62ef8bc35..1b7d06922608f 100644
--- a/sklearn/datasets/tests/test_california_housing.py
+++ b/sklearn/datasets/tests/test_california_housing.py
@@ -1,8 +1,8 @@
-"""Test the california_housing loader.
-
-Skipped if california_housing is not already downloaded to data_home.
-"""
+"""Test the california_housing loader, if the data is available,
+or if specifically requested via environment variable
+(e.g. for travis cron job)."""
 
+import os
 import pytest
 
 from sklearn.datasets import fetch_california_housing
@@ -15,8 +15,12 @@ def fetch(*args, **kwargs):
 
 
 def _is_california_housing_dataset_not_available():
+    # Do not download data, unless explicitly requested via environment var
+    download_if_missing = False
+    if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 1)) == 0:
+        download_if_missing = True
     try:
-        fetch_california_housing(download_if_missing=False)
+        fetch_california_housing(download_if_missing=download_if_missing)
         return False
     except IOError:
         return True
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index 1127b8114c5e7..efb195d38fe66 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -1,8 +1,8 @@
-"""Test the covtype loader.
-
-Skipped if covtype is not already downloaded to data_home.
-"""
+"""Test the covtype loader, if the data is available,
+or if specifically requested via environment variable
+(e.g. for travis cron job)."""
 
+import os
 from sklearn.datasets import fetch_covtype
 from sklearn.utils._testing import SkipTest
 from sklearn.datasets.tests.test_common import check_return_X_y
@@ -10,7 +10,12 @@
 
 
 def fetch(*args, **kwargs):
-    return fetch_covtype(*args, download_if_missing=False, **kwargs)
+    # Do not download data, unless explicitly requested via environment var
+    download_if_missing = False
+    if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 1)) == 0:
+        download_if_missing = True
+    return fetch_covtype(*args, download_if_missing=download_if_missing,
+                         **kwargs)
 
 
 def test_fetch():
diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py
index 6d371e5a8e6f0..d367eac4eadf5 100644
--- a/sklearn/datasets/tests/test_kddcup99.py
+++ b/sklearn/datasets/tests/test_kddcup99.py
@@ -1,18 +1,23 @@
-"""Test  kddcup99 loader. Only 'percent10' mode is tested, as the full data
-is too big to use in unit-testing.
+"""Test  kddcup99 loader, if the data is available,
+or if specifically requested via environment variable
+(e.g. for travis cron job).
 
-The test is skipped if the data wasn't previously fetched and saved to
-scikit-learn data folder.
+Only 'percent10' mode is tested, as the full data
+is too big to use in unit-testing.
 """
 
+import os
 from sklearn.datasets import fetch_kddcup99
 from sklearn.datasets.tests.test_common import check_return_X_y
 from sklearn.utils._testing import SkipTest
 from functools import partial
 
 
-
 def test_percent10():
+    # Do not download data, unless explicitly requested via environment var
+    download_if_missing = False
+    if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 1)) == 0:
+        download_if_missing = True
     try:
         data = fetch_kddcup99(download_if_missing=False)
     except IOError:
diff --git a/sklearn/datasets/tests/test_olivetti_faces.py b/sklearn/datasets/tests/test_olivetti_faces.py
index 0162676c50af7..5e91df96128c6 100644
--- a/sklearn/datasets/tests/test_olivetti_faces.py
+++ b/sklearn/datasets/tests/test_olivetti_faces.py
@@ -1,4 +1,8 @@
-"""Test Olivetti faces fetcher, if the data is available."""
+"""Test Olivetti faces fetcher, if the data is available,
+or if specifically requested via environment variable
+(e.g. for travis cron job)."""
+
+import os
 import pytest
 import numpy as np
 
@@ -10,8 +14,12 @@
 
 
 def _is_olivetti_faces_not_available():
+    # Do not download data, unless explicitly requested via environment var
+    download_if_missing = False
+    if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 1)) == 0:
+        download_if_missing = True
     try:
-        datasets.fetch_olivetti_faces(download_if_missing=False)
+        datasets.fetch_olivetti_faces(download_if_missing=download_if_missing)
         return False
     except IOError:
         return True

From a1ad4df0fa670b7b2577bd4802780e27a9575e68 Mon Sep 17 00:00:00 2001
From: Roman Feldbauer <sci@feldbauer.org>
Date: Fri, 31 Jan 2020 17:36:21 +0100
Subject: [PATCH 05/15] fix kddcup99 [scipy-dev]

---
 sklearn/datasets/tests/test_kddcup99.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py
index d367eac4eadf5..01931ba27ed84 100644
--- a/sklearn/datasets/tests/test_kddcup99.py
+++ b/sklearn/datasets/tests/test_kddcup99.py
@@ -19,7 +19,7 @@ def test_percent10():
     if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 1)) == 0:
         download_if_missing = True
     try:
-        data = fetch_kddcup99(download_if_missing=False)
+        data = fetch_kddcup99(download_if_missing=download_if_missing)
     except IOError:
         raise SkipTest("kddcup99 dataset can not be loaded.")
 

From 99c02f2ef80e5d6c8f240a8df68aac76ea1e2be1 Mon Sep 17 00:00:00 2001
From: Roman Feldbauer <sci@feldbauer.org>
Date: Fri, 14 Feb 2020 13:01:26 +0100
Subject: [PATCH 06/15] fetch datasets in wrapper func [scipy-dev]

---
 sklearn/datasets/tests/test_20news.py         | 69 +++++++++----------
 .../datasets/tests/test_california_housing.py | 10 ++-
 sklearn/datasets/tests/test_covtype.py        | 23 +++----
 sklearn/datasets/tests/test_kddcup99.py       | 43 ++++++------
 sklearn/datasets/tests/test_olivetti_faces.py |  6 +-
 sklearn/datasets/tests/test_rcv1.py           | 30 ++++----
 6 files changed, 88 insertions(+), 93 deletions(-)

diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py
index 72b5dad97d7fe..76453d7fbc636 100644
--- a/sklearn/datasets/tests/test_20news.py
+++ b/sklearn/datasets/tests/test_20news.py
@@ -2,32 +2,40 @@
 or if specifically requested via environment variable
 (e.g. for travis cron job)."""
 from functools import partial
-import os
+from os import environ
+import pytest
 
 import numpy as np
 import scipy.sparse as sp
 
-from sklearn.utils._testing import SkipTest, assert_allclose_dense_sparse
+from sklearn.utils._testing import assert_allclose_dense_sparse
 from sklearn.datasets.tests.test_common import check_return_X_y
 
 from sklearn import datasets
 from sklearn.preprocessing import normalize
 
 
-def test_20news():
+def _fetch_20newsgroups(vectorized=False, *args, **kwargs):
     # Do not download data, unless explicitly requested via environment var
-    download_if_missing = False
-    if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 1)) == 0:
-        download_if_missing = True
+    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
     try:
-        data = datasets.fetch_20newsgroups(
-            subset='all', download_if_missing=download_if_missing,
-            shuffle=False)
+        if vectorized:
+            return datasets.fetch_20newsgroups_vectorized(
+                *args, download_if_missing=download_if_missing, **kwargs)
+        else:
+            return datasets.fetch_20newsgroups(
+                *args, download_if_missing=download_if_missing, **kwargs)
     except IOError:
-        raise SkipTest("Download 20 newsgroups to run this test")
+        return None
+
+
+@pytest.mark.skipif(_fetch_20newsgroups() is None,
+                    reason="Download 20 newsgroups to run this test")
+def test_20news():
+    data = _fetch_20newsgroups(subset='all', shuffle=False)
 
     # Extract a reduced dataset
-    data2cats = datasets.fetch_20newsgroups(
+    data2cats = _fetch_20newsgroups(
         subset='all', categories=data.target_names[-1:-3:-1], shuffle=False)
     # Check that the ordering of the target_names is the same
     # as the ordering in the full dataset
@@ -48,72 +56,61 @@ def test_20news():
     assert entry1 == entry2
 
     # check that return_X_y option
-    X, y = datasets.fetch_20newsgroups(
+    X, y = _fetch_20newsgroups(
         subset='all', shuffle=False, return_X_y=True
     )
     assert len(X) == len(data.data)
     assert y.shape == data.target.shape
 
 
+@pytest.mark.skipif(_fetch_20newsgroups() is None,
+                    reason="Download 20 newsgroups to run this test")
 def test_20news_length_consistency():
     """Checks the length consistencies within the bunch
 
     This is a non-regression test for a bug present in 0.16.1.
     """
-    try:
-        data = datasets.fetch_20newsgroups(
-            subset='all', download_if_missing=False, shuffle=False)
-    except IOError:
-        raise SkipTest("Download 20 newsgroups to run this test")
     # Extract the full dataset
-    data = datasets.fetch_20newsgroups(subset='all')
+    data = _fetch_20newsgroups(subset='all')
     assert len(data['data']) == len(data.data)
     assert len(data['target']) == len(data.target)
     assert len(data['filenames']) == len(data.filenames)
 
 
+@pytest.mark.skipif(_fetch_20newsgroups(vectorized=True) is None,
+                    reason="Download 20 news vectorized to run this test")
 def test_20news_vectorized():
-    try:
-        datasets.fetch_20newsgroups(subset='all',
-                                    download_if_missing=False)
-    except IOError:
-        raise SkipTest("Download 20 newsgroups to run this test")
-
     # test subset = train
-    bunch = datasets.fetch_20newsgroups_vectorized(subset="train")
+    bunch = _fetch_20newsgroups(vectorized=True, subset="train")
     assert sp.isspmatrix_csr(bunch.data)
     assert bunch.data.shape == (11314, 130107)
     assert bunch.target.shape[0] == 11314
     assert bunch.data.dtype == np.float64
 
     # test subset = test
-    bunch = datasets.fetch_20newsgroups_vectorized(subset="test")
+    bunch = _fetch_20newsgroups(vectorized=True, subset="test")
     assert sp.isspmatrix_csr(bunch.data)
     assert bunch.data.shape == (7532, 130107)
     assert bunch.target.shape[0] == 7532
     assert bunch.data.dtype == np.float64
 
     # test return_X_y option
-    fetch_func = partial(datasets.fetch_20newsgroups_vectorized, subset='test')
+    fetch_func = partial(_fetch_20newsgroups, vectorized=True, subset='test')
     check_return_X_y(bunch, fetch_func)
 
     # test subset = all
-    bunch = datasets.fetch_20newsgroups_vectorized(subset='all')
+    bunch = _fetch_20newsgroups(vectorized=True, subset='all')
     assert sp.isspmatrix_csr(bunch.data)
     assert bunch.data.shape == (11314 + 7532, 130107)
     assert bunch.target.shape[0] == 11314 + 7532
     assert bunch.data.dtype == np.float64
 
 
+@pytest.mark.skipif(_fetch_20newsgroups(vectorized=True) is None,
+                    reason="Download 20 news vectorized to run this test")
 def test_20news_normalization():
-    try:
-        X = datasets.fetch_20newsgroups_vectorized(normalize=False,
-                                                   download_if_missing=False)
-        X_ = datasets.fetch_20newsgroups_vectorized(normalize=True,
-                                                    download_if_missing=False)
-    except IOError:
-        raise SkipTest("Download 20 newsgroups to run this test")
-
+    X = _fetch_20newsgroups(vectorized=True, normalize=False)
+    X_ = _fetch_20newsgroups(vectorized=True, normalize=True)
     X_norm = X_['data'][:100]
     X = X['data'][:100]
 
diff --git a/sklearn/datasets/tests/test_california_housing.py b/sklearn/datasets/tests/test_california_housing.py
index 1b7d06922608f..482492316cf68 100644
--- a/sklearn/datasets/tests/test_california_housing.py
+++ b/sklearn/datasets/tests/test_california_housing.py
@@ -2,7 +2,7 @@
 or if specifically requested via environment variable
 (e.g. for travis cron job)."""
 
-import os
+from os import environ
 import pytest
 
 from sklearn.datasets import fetch_california_housing
@@ -11,16 +11,14 @@
 
 
 def fetch(*args, **kwargs):
-    return fetch_california_housing(*args, download_if_missing=False, **kwargs)
+    return fetch_california_housing(*args, **kwargs)
 
 
 def _is_california_housing_dataset_not_available():
     # Do not download data, unless explicitly requested via environment var
-    download_if_missing = False
-    if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 1)) == 0:
-        download_if_missing = True
+    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
     try:
-        fetch_california_housing(download_if_missing=download_if_missing)
+        fetch(download_if_missing=download_if_missing)
         return False
     except IOError:
         return True
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index efb195d38fe66..386445807caac 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -2,28 +2,27 @@
 or if specifically requested via environment variable
 (e.g. for travis cron job)."""
 
-import os
+from os import environ
+import pytest
 from sklearn.datasets import fetch_covtype
-from sklearn.utils._testing import SkipTest
 from sklearn.datasets.tests.test_common import check_return_X_y
 from functools import partial
 
 
 def fetch(*args, **kwargs):
     # Do not download data, unless explicitly requested via environment var
-    download_if_missing = False
-    if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 1)) == 0:
-        download_if_missing = True
-    return fetch_covtype(*args, download_if_missing=download_if_missing,
-                         **kwargs)
-
-
-def test_fetch():
+    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
     try:
-        data1 = fetch(shuffle=True, random_state=42)
+        return fetch_covtype(*args, download_if_missing=download_if_missing,
+                             **kwargs)
     except IOError:
-        raise SkipTest("Covertype dataset can not be loaded.")
+        return None
 
+
+@pytest.mark.skipif(fetch() is None,
+                    reason="Download covtype to run this test")
+def test_fetch():
+    data1 = fetch(shuffle=True, random_state=42)
     data2 = fetch(shuffle=True, random_state=37)
 
     X1, X2 = data1['data'], data2['data']
diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py
index 01931ba27ed84..74668413efbf3 100644
--- a/sklearn/datasets/tests/test_kddcup99.py
+++ b/sklearn/datasets/tests/test_kddcup99.py
@@ -6,55 +6,58 @@
 is too big to use in unit-testing.
 """
 
-import os
+from os import environ
+import pytest
 from sklearn.datasets import fetch_kddcup99
 from sklearn.datasets.tests.test_common import check_return_X_y
-from sklearn.utils._testing import SkipTest
 from functools import partial
 
 
-def test_percent10():
+def _fetch_dataset(*args, **kwargs):
     # Do not download data, unless explicitly requested via environment var
-    download_if_missing = False
-    if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 1)) == 0:
-        download_if_missing = True
+    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
     try:
-        data = fetch_kddcup99(download_if_missing=download_if_missing)
+        return fetch_kddcup99(*args, download_if_missing=download_if_missing,
+                              **kwargs)
     except IOError:
-        raise SkipTest("kddcup99 dataset can not be loaded.")
+        return None
+
+
+@pytest.mark.skipif(_fetch_dataset() is None,
+                    reason="Download kddcup99 to run this test")
+def test_percent10():
+    data = _fetch_dataset()
 
     assert data.data.shape == (494021, 41)
     assert data.target.shape == (494021,)
 
-    data_shuffled = fetch_kddcup99(shuffle=True, random_state=0)
+    data_shuffled = _fetch_dataset(shuffle=True, random_state=0)
     assert data.data.shape == data_shuffled.data.shape
     assert data.target.shape == data_shuffled.target.shape
 
-    data = fetch_kddcup99('SA')
+    data = _fetch_dataset('SA')
     assert data.data.shape == (100655, 41)
     assert data.target.shape == (100655,)
 
-    data = fetch_kddcup99('SF')
+    data = _fetch_dataset('SF')
     assert data.data.shape == (73237, 4)
     assert data.target.shape == (73237,)
 
-    data = fetch_kddcup99('http')
+    data = _fetch_dataset('http')
     assert data.data.shape == (58725, 3)
     assert data.target.shape == (58725,)
 
-    data = fetch_kddcup99('smtp')
+    data = _fetch_dataset('smtp')
     assert data.data.shape == (9571, 3)
     assert data.target.shape == (9571,)
 
-    fetch_func = partial(fetch_kddcup99, 'smtp')
+    fetch_func = partial(_fetch_dataset, 'smtp')
     check_return_X_y(data, fetch_func)
 
 
+@pytest.mark.skipif(_fetch_dataset() is None,
+                    reason="Download kddcup99 to run this test")
 def test_shuffle():
-    try:
-        dataset = fetch_kddcup99(random_state=0, subset='SA', shuffle=True,
-                                 percent10=True, download_if_missing=False)
-    except IOError:
-        raise SkipTest("kddcup99 dataset can not be loaded.")
-
+    dataset = _fetch_dataset(random_state=0, subset='SA', shuffle=True,
+                             percent10=True)
     assert(any(dataset.target[-100:] == b'normal.'))
diff --git a/sklearn/datasets/tests/test_olivetti_faces.py b/sklearn/datasets/tests/test_olivetti_faces.py
index 5e91df96128c6..2d7e53815eef1 100644
--- a/sklearn/datasets/tests/test_olivetti_faces.py
+++ b/sklearn/datasets/tests/test_olivetti_faces.py
@@ -2,7 +2,7 @@
 or if specifically requested via environment variable
 (e.g. for travis cron job)."""
 
-import os
+from os import environ
 import pytest
 import numpy as np
 
@@ -15,9 +15,7 @@
 
 def _is_olivetti_faces_not_available():
     # Do not download data, unless explicitly requested via environment var
-    download_if_missing = False
-    if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 1)) == 0:
-        download_if_missing = True
+    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
     try:
         datasets.fetch_olivetti_faces(download_if_missing=download_if_missing)
         return False
diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py
index f733a8879be23..ad060dc22e4ae 100644
--- a/sklearn/datasets/tests/test_rcv1.py
+++ b/sklearn/datasets/tests/test_rcv1.py
@@ -2,8 +2,8 @@
 or if specifically requested via environment variable
 (e.g. for travis cron job)."""
 
-import errno
-import os
+from os import environ
+import pytest
 import scipy.sparse as sp
 import numpy as np
 from functools import partial
@@ -11,21 +11,22 @@
 from sklearn.datasets.tests.test_common import check_return_X_y
 from sklearn.utils._testing import assert_almost_equal
 from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import SkipTest
 
 
-def test_fetch_rcv1():
+def _fetch_data(*args, **kwargs):
     # Do not download data, unless explicitly requested via environment var
-    download_if_missing = False
-    if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 1)) == 0:
-        download_if_missing = True
+    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
     try:
-        data1 = fetch_rcv1(shuffle=False,
-                           download_if_missing=download_if_missing)
-    except IOError as e:
-        if e.errno == errno.ENOENT:
-            raise SkipTest("Download RCV1 dataset to run this test.")
+        return fetch_rcv1(*args, download_if_missing=download_if_missing,
+                          **kwargs)
+    except IOError:
+        return None
+
 
+@pytest.mark.skipif(_fetch_data() is None,
+                    reason="Download RCV1 to run this test")
+def test_fetch_rcv1():
+    data1 = _fetch_data(shuffle=False)
     X1, Y1 = data1.data, data1.target
     cat_list, s1 = data1.target_names.tolist(), data1.sample_id
 
@@ -53,13 +54,12 @@ def test_fetch_rcv1():
         assert num == Y1[:, j].data.size
 
     # test shuffling and subset
-    data2 = fetch_rcv1(shuffle=True, subset='train', random_state=77,
-                       download_if_missing=False)
+    data2 = _fetch_data(shuffle=True, subset='train', random_state=77)
     X2, Y2 = data2.data, data2.target
     s2 = data2.sample_id
 
     # test return_X_y option
-    fetch_func = partial(fetch_rcv1, shuffle=False, subset='train',
+    fetch_func = partial(_fetch_data, shuffle=False, subset='train',
                          download_if_missing=False)
     check_return_X_y(data2, fetch_func)
 

From 4d3499c55d140d2b55ef9208f8b9ab39acc632ef Mon Sep 17 00:00:00 2001
From: Roman Feldbauer <sci@feldbauer.org>
Date: Fri, 14 Feb 2020 13:31:45 +0100
Subject: [PATCH 07/15] fix rcv1 test [scipy-dev]

---
 sklearn/datasets/tests/test_rcv1.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py
index ad060dc22e4ae..bb93051029192 100644
--- a/sklearn/datasets/tests/test_rcv1.py
+++ b/sklearn/datasets/tests/test_rcv1.py
@@ -59,8 +59,7 @@ def test_fetch_rcv1():
     s2 = data2.sample_id
 
     # test return_X_y option
-    fetch_func = partial(_fetch_data, shuffle=False, subset='train',
-                         download_if_missing=False)
+    fetch_func = partial(_fetch_data, shuffle=False, subset='train')
     check_return_X_y(data2, fetch_func)
 
     # The first 23149 samples are the training samples

From ec4db968dc9a05afd7dcc78f55bd37af9010bf54 Mon Sep 17 00:00:00 2001
From: Roman Feldbauer <sci@feldbauer.org>
Date: Fri, 14 Feb 2020 16:37:35 +0100
Subject: [PATCH 08/15] do not skip test_pandas_dependency_message [scipy-dev]

---
 .../datasets/tests/test_california_housing.py | 31 +++++++++++++------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/sklearn/datasets/tests/test_california_housing.py b/sklearn/datasets/tests/test_california_housing.py
index 482492316cf68..614c00c54b831 100644
--- a/sklearn/datasets/tests/test_california_housing.py
+++ b/sklearn/datasets/tests/test_california_housing.py
@@ -2,6 +2,7 @@
 or if specifically requested via environment variable
 (e.g. for travis cron job)."""
 
+import builtins
 from os import environ
 import pytest
 
@@ -52,18 +53,28 @@ def test_fetch_asframe():
     assert isinstance(bunch.target, pd.DataFrame)
 
 
+@pytest.fixture
+def hide_available_pandas(monkeypatch):
+    """ Pretend pandas was not installed. """
+    import_orig = builtins.__import__
+
+    def mocked_import(name, *args, **kwargs):
+        if name == 'pandas':
+            raise ImportError()
+        return import_orig(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, '__import__', mocked_import)
+
+
 @pytest.mark.skipif(
     _is_california_housing_dataset_not_available(),
     reason='Download California Housing dataset to run this test'
 )
+@pytest.mark.usefixtures('hide_available_pandas')
 def test_pandas_dependency_message():
-    try:
-        import pandas  # noqa
-        pytest.skip("This test requires pandas to be not installed")
-    except ImportError:
-        # Check that pandas is imported lazily and that an informative error
-        # message is raised when pandas is missing:
-        expected_msg = ('fetch_california_housing with as_frame=True'
-                        ' requires pandas')
-        with pytest.raises(ImportError, match=expected_msg):
-            fetch_california_housing(as_frame=True)
+    # Check that pandas is imported lazily and that an informative error
+    # message is raised when pandas is missing:
+    expected_msg = ('fetch_california_housing with as_frame=True'
+                    ' requires pandas')
+    with pytest.raises(ImportError, match=expected_msg):
+        fetch_california_housing(as_frame=True)

From e7081fa1b1db759589d08cdd297d033d676eed28 Mon Sep 17 00:00:00 2001
From: Roman Feldbauer <roman@feldbauer.org>
Date: Mon, 17 Feb 2020 14:43:33 +0100
Subject: [PATCH 09/15] introduce fetch fixtures [scipy-dev]

---
 sklearn/datasets/tests/conftest.py            | 78 +++++++++++++++++++
 sklearn/datasets/tests/test_20news.py         | 56 ++++---------
 sklearn/datasets/tests/test_base.py           |  1 +
 .../datasets/tests/test_california_housing.py | 55 ++-----------
 sklearn/datasets/tests/test_covtype.py        | 23 +-----
 sklearn/datasets/tests/test_kddcup99.py       | 37 +++------
 sklearn/datasets/tests/test_olivetti_faces.py | 23 +-----
 sklearn/datasets/tests/test_rcv1.py           | 23 +-----
 8 files changed, 120 insertions(+), 176 deletions(-)
 create mode 100644 sklearn/datasets/tests/conftest.py

diff --git a/sklearn/datasets/tests/conftest.py b/sklearn/datasets/tests/conftest.py
new file mode 100644
index 0000000000000..fddc22dd80c8f
--- /dev/null
+++ b/sklearn/datasets/tests/conftest.py
@@ -0,0 +1,78 @@
+""" Network tests are only run, if data is already locally available,
+or if download is specifically requested by environment variable."""
+import builtins
+from os import environ
+import pytest
+from sklearn.datasets import (
+    fetch_20newsgroups as _fetch_20newsgroups,
+    fetch_20newsgroups_vectorized as _fetch_20newsgroups_vectorized,
+    fetch_california_housing as _fetch_california_housing,
+    fetch_covtype as _fetch_covtype,
+    fetch_kddcup99 as _fetch_kddcup99,
+    fetch_olivetti_faces as _fetch_olivetti_faces,
+    fetch_rcv1 as _fetch_rcv1,
+)
+
+
+def _wrapped_fetch(f, dataset_name):
+    """ Fetch dataset (download if missing and requested by environment) """
+    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
+
+    def wrapped(*args, **kwargs):
+        kwargs['download_if_missing'] = download_if_missing
+        try:
+            return f(*args, **kwargs)
+        except IOError:
+            pytest.skip("Download {} to run this test".format(dataset_name))
+
+    return wrapped
+
+
+@pytest.fixture
+def fetch_20newsgroups():
+    return _wrapped_fetch(_fetch_20newsgroups, dataset_name='20newsgroups')
+
+
+@pytest.fixture
+def fetch_20newsgroups_vectorized():
+    return _wrapped_fetch(_fetch_20newsgroups_vectorized,
+                          dataset_name='20newsgroups_vectorized')
+
+
+@pytest.fixture
+def fetch_california_housing():
+    return _wrapped_fetch(_fetch_california_housing,
+                          dataset_name='california_housing')
+
+
+@pytest.fixture
+def fetch_covtype():
+    return _wrapped_fetch(_fetch_covtype, dataset_name='covtype')
+
+
+@pytest.fixture
+def fetch_kddcup99():
+    return _wrapped_fetch(_fetch_kddcup99, dataset_name='kddcup99')
+
+
+@pytest.fixture
+def fetch_olivetti_faces():
+    return _wrapped_fetch(_fetch_olivetti_faces, dataset_name='olivetti_faces')
+
+
+@pytest.fixture
+def fetch_rcv1():
+    return _wrapped_fetch(_fetch_rcv1, dataset_name='rcv1')
+
+
+@pytest.fixture
+def hide_available_pandas(monkeypatch):
+    """ Pretend pandas was not installed. """
+    import_orig = builtins.__import__
+
+    def mocked_import(name, *args, **kwargs):
+        if name == 'pandas':
+            raise ImportError()
+        return import_orig(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, '__import__', mocked_import)
diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py
index 76453d7fbc636..d56d05579e311 100644
--- a/sklearn/datasets/tests/test_20news.py
+++ b/sklearn/datasets/tests/test_20news.py
@@ -2,40 +2,20 @@
 or if specifically requested via environment variable
 (e.g. for travis cron job)."""
 from functools import partial
-from os import environ
-import pytest
 
 import numpy as np
 import scipy.sparse as sp
 
 from sklearn.utils._testing import assert_allclose_dense_sparse
 from sklearn.datasets.tests.test_common import check_return_X_y
-
-from sklearn import datasets
 from sklearn.preprocessing import normalize
 
 
-def _fetch_20newsgroups(vectorized=False, *args, **kwargs):
-    # Do not download data, unless explicitly requested via environment var
-    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
-    try:
-        if vectorized:
-            return datasets.fetch_20newsgroups_vectorized(
-                *args, download_if_missing=download_if_missing, **kwargs)
-        else:
-            return datasets.fetch_20newsgroups(
-                *args, download_if_missing=download_if_missing, **kwargs)
-    except IOError:
-        return None
-
-
-@pytest.mark.skipif(_fetch_20newsgroups() is None,
-                    reason="Download 20 newsgroups to run this test")
-def test_20news():
-    data = _fetch_20newsgroups(subset='all', shuffle=False)
+def test_20news(fetch_20newsgroups):
+    data = fetch_20newsgroups(subset='all', shuffle=False)
 
     # Extract a reduced dataset
-    data2cats = _fetch_20newsgroups(
+    data2cats = fetch_20newsgroups(
         subset='all', categories=data.target_names[-1:-3:-1], shuffle=False)
     # Check that the ordering of the target_names is the same
     # as the ordering in the full dataset
@@ -56,61 +36,53 @@ def test_20news():
     assert entry1 == entry2
 
     # check that return_X_y option
-    X, y = _fetch_20newsgroups(
-        subset='all', shuffle=False, return_X_y=True
-    )
+    X, y = fetch_20newsgroups(subset='all', shuffle=False, return_X_y=True)
     assert len(X) == len(data.data)
     assert y.shape == data.target.shape
 
 
-@pytest.mark.skipif(_fetch_20newsgroups() is None,
-                    reason="Download 20 newsgroups to run this test")
-def test_20news_length_consistency():
+def test_20news_length_consistency(fetch_20newsgroups):
     """Checks the length consistencies within the bunch
 
     This is a non-regression test for a bug present in 0.16.1.
     """
     # Extract the full dataset
-    data = _fetch_20newsgroups(subset='all')
+    data = fetch_20newsgroups(subset='all')
     assert len(data['data']) == len(data.data)
     assert len(data['target']) == len(data.target)
     assert len(data['filenames']) == len(data.filenames)
 
 
-@pytest.mark.skipif(_fetch_20newsgroups(vectorized=True) is None,
-                    reason="Download 20 news vectorized to run this test")
-def test_20news_vectorized():
+def test_20news_vectorized(fetch_20newsgroups_vectorized):
     # test subset = train
-    bunch = _fetch_20newsgroups(vectorized=True, subset="train")
+    bunch = fetch_20newsgroups_vectorized(subset="train")
     assert sp.isspmatrix_csr(bunch.data)
     assert bunch.data.shape == (11314, 130107)
     assert bunch.target.shape[0] == 11314
     assert bunch.data.dtype == np.float64
 
     # test subset = test
-    bunch = _fetch_20newsgroups(vectorized=True, subset="test")
+    bunch = fetch_20newsgroups_vectorized(subset="test")
     assert sp.isspmatrix_csr(bunch.data)
     assert bunch.data.shape == (7532, 130107)
     assert bunch.target.shape[0] == 7532
     assert bunch.data.dtype == np.float64
 
     # test return_X_y option
-    fetch_func = partial(_fetch_20newsgroups, vectorized=True, subset='test')
+    fetch_func = partial(fetch_20newsgroups_vectorized, subset='test')
     check_return_X_y(bunch, fetch_func)
 
     # test subset = all
-    bunch = _fetch_20newsgroups(vectorized=True, subset='all')
+    bunch = fetch_20newsgroups_vectorized(subset='all')
     assert sp.isspmatrix_csr(bunch.data)
     assert bunch.data.shape == (11314 + 7532, 130107)
     assert bunch.target.shape[0] == 11314 + 7532
     assert bunch.data.dtype == np.float64
 
 
-@pytest.mark.skipif(_fetch_20newsgroups(vectorized=True) is None,
-                    reason="Download 20 news vectorized to run this test")
-def test_20news_normalization():
-    X = _fetch_20newsgroups(vectorized=True, normalize=False)
-    X_ = _fetch_20newsgroups(vectorized=True, normalize=True)
+def test_20news_normalization(fetch_20newsgroups_vectorized):
+    X = fetch_20newsgroups_vectorized(normalize=False)
+    X_ = fetch_20newsgroups_vectorized(normalize=True)
     X_norm = X_['data'][:100]
     X = X['data'][:100]
 
diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py
index a58bdc9ed644d..f3e7769d0d0f3 100644
--- a/sklearn/datasets/tests/test_base.py
+++ b/sklearn/datasets/tests/test_base.py
@@ -257,6 +257,7 @@ def test_toy_dataset_as_frame(loader_func, data_dtype, target_dtype):
     load_linnerud,
     load_wine,
 ])
+@pytest.mark.usefixtures('hide_available_pandas')
 def test_toy_dataset_as_frame_no_pandas(loader_func):
     check_pandas_dependency_message(loader_func)
 
diff --git a/sklearn/datasets/tests/test_california_housing.py b/sklearn/datasets/tests/test_california_housing.py
index 5400cab210070..ced4a20fc902d 100644
--- a/sklearn/datasets/tests/test_california_housing.py
+++ b/sklearn/datasets/tests/test_california_housing.py
@@ -1,51 +1,25 @@
 """Test the california_housing loader, if the data is available,
 or if specifically requested via environment variable
 (e.g. for travis cron job)."""
-
-import builtins
-from os import environ
 import pytest
 
-from sklearn.datasets import fetch_california_housing
 from sklearn.datasets.tests.test_common import check_return_X_y
 from functools import partial
 
 
-def fetch(*args, **kwargs):
-    return fetch_california_housing(*args, **kwargs)
-
-
-def _is_california_housing_dataset_not_available():
-    # Do not download data, unless explicitly requested via environment var
-    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
-    try:
-        fetch(download_if_missing=download_if_missing)
-        return False
-    except IOError:
-        return True
-
-
-@pytest.mark.skipif(
-    _is_california_housing_dataset_not_available(),
-    reason='Download California Housing dataset to run this test'
-)
-def test_fetch():
-    data = fetch()
+def test_fetch(fetch_california_housing):
+    data = fetch_california_housing()
     assert((20640, 8) == data.data.shape)
     assert((20640, ) == data.target.shape)
 
     # test return_X_y option
-    fetch_func = partial(fetch)
+    fetch_func = partial(fetch_california_housing)
     check_return_X_y(data, fetch_func)
 
 
-@pytest.mark.skipif(
-    _is_california_housing_dataset_not_available(),
-    reason='Download California Housing dataset to run this test'
-)
-def test_fetch_asframe():
+def test_fetch_asframe(fetch_california_housing):
     pd = pytest.importorskip('pandas')
-    bunch = fetch(as_frame=True)
+    bunch = fetch_california_housing(as_frame=True)
     frame = bunch.frame
     assert hasattr(bunch, 'frame') is True
     assert frame.shape == (20640, 9)
@@ -53,25 +27,8 @@ def test_fetch_asframe():
     assert isinstance(bunch.target, pd.Series)
 
 
-@pytest.fixture
-def hide_available_pandas(monkeypatch):
-    """ Pretend pandas was not installed. """
-    import_orig = builtins.__import__
-
-    def mocked_import(name, *args, **kwargs):
-        if name == 'pandas':
-            raise ImportError()
-        return import_orig(name, *args, **kwargs)
-
-    monkeypatch.setattr(builtins, '__import__', mocked_import)
-
-
-@pytest.mark.skipif(
-    _is_california_housing_dataset_not_available(),
-    reason='Download California Housing dataset to run this test'
-)
 @pytest.mark.usefixtures('hide_available_pandas')
-def test_pandas_dependency_message():
+def test_pandas_dependency_message(fetch_california_housing):
     # Check that pandas is imported lazily and that an informative error
     # message is raised when pandas is missing:
     expected_msg = ('fetch_california_housing with as_frame=True'
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index 386445807caac..1b127b0100f1f 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -2,28 +2,13 @@
 or if specifically requested via environment variable
 (e.g. for travis cron job)."""
 
-from os import environ
-import pytest
-from sklearn.datasets import fetch_covtype
 from sklearn.datasets.tests.test_common import check_return_X_y
 from functools import partial
 
 
-def fetch(*args, **kwargs):
-    # Do not download data, unless explicitly requested via environment var
-    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
-    try:
-        return fetch_covtype(*args, download_if_missing=download_if_missing,
-                             **kwargs)
-    except IOError:
-        return None
-
-
-@pytest.mark.skipif(fetch() is None,
-                    reason="Download covtype to run this test")
-def test_fetch():
-    data1 = fetch(shuffle=True, random_state=42)
-    data2 = fetch(shuffle=True, random_state=37)
+def test_fetch(fetch_covtype):
+    data1 = fetch_covtype(shuffle=True, random_state=42)
+    data2 = fetch_covtype(shuffle=True, random_state=37)
 
     X1, X2 = data1['data'], data2['data']
     assert (581012, 54) == X1.shape
@@ -36,5 +21,5 @@ def test_fetch():
     assert (X1.shape[0],) == y2.shape
 
     # test return_X_y option
-    fetch_func = partial(fetch)
+    fetch_func = partial(fetch_covtype)
     check_return_X_y(data1, fetch_func)
diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py
index 74668413efbf3..f0827290ce2e8 100644
--- a/sklearn/datasets/tests/test_kddcup99.py
+++ b/sklearn/datasets/tests/test_kddcup99.py
@@ -6,58 +6,41 @@
 is too big to use in unit-testing.
 """
 
-from os import environ
-import pytest
-from sklearn.datasets import fetch_kddcup99
 from sklearn.datasets.tests.test_common import check_return_X_y
 from functools import partial
 
 
-def _fetch_dataset(*args, **kwargs):
-    # Do not download data, unless explicitly requested via environment var
-    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
-    try:
-        return fetch_kddcup99(*args, download_if_missing=download_if_missing,
-                              **kwargs)
-    except IOError:
-        return None
-
-
-@pytest.mark.skipif(_fetch_dataset() is None,
-                    reason="Download kddcup99 to run this test")
-def test_percent10():
-    data = _fetch_dataset()
+def test_percent10(fetch_kddcup99):
+    data = fetch_kddcup99()
 
     assert data.data.shape == (494021, 41)
     assert data.target.shape == (494021,)
 
-    data_shuffled = _fetch_dataset(shuffle=True, random_state=0)
+    data_shuffled = fetch_kddcup99(shuffle=True, random_state=0)
     assert data.data.shape == data_shuffled.data.shape
     assert data.target.shape == data_shuffled.target.shape
 
-    data = _fetch_dataset('SA')
+    data = fetch_kddcup99('SA')
     assert data.data.shape == (100655, 41)
     assert data.target.shape == (100655,)
 
-    data = _fetch_dataset('SF')
+    data = fetch_kddcup99('SF')
     assert data.data.shape == (73237, 4)
     assert data.target.shape == (73237,)
 
-    data = _fetch_dataset('http')
+    data = fetch_kddcup99('http')
     assert data.data.shape == (58725, 3)
     assert data.target.shape == (58725,)
 
-    data = _fetch_dataset('smtp')
+    data = fetch_kddcup99('smtp')
     assert data.data.shape == (9571, 3)
     assert data.target.shape == (9571,)
 
-    fetch_func = partial(_fetch_dataset, 'smtp')
+    fetch_func = partial(fetch_kddcup99, 'smtp')
     check_return_X_y(data, fetch_func)
 
 
-@pytest.mark.skipif(_fetch_dataset() is None,
-                    reason="Download kddcup99 to run this test")
-def test_shuffle():
-    dataset = _fetch_dataset(random_state=0, subset='SA', shuffle=True,
+def test_shuffle(fetch_kddcup99):
+    dataset = fetch_kddcup99(random_state=0, subset='SA', shuffle=True,
                              percent10=True)
     assert(any(dataset.target[-100:] == b'normal.'))
diff --git a/sklearn/datasets/tests/test_olivetti_faces.py b/sklearn/datasets/tests/test_olivetti_faces.py
index 2d7e53815eef1..cb76b1f1e87a8 100644
--- a/sklearn/datasets/tests/test_olivetti_faces.py
+++ b/sklearn/datasets/tests/test_olivetti_faces.py
@@ -2,33 +2,16 @@
 or if specifically requested via environment variable
 (e.g. for travis cron job)."""
 
-from os import environ
-import pytest
 import numpy as np
 
-from sklearn import datasets
 from sklearn.utils import Bunch
 from sklearn.datasets.tests.test_common import check_return_X_y
 
 from sklearn.utils._testing import assert_array_equal
 
 
-def _is_olivetti_faces_not_available():
-    # Do not download data, unless explicitly requested via environment var
-    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
-    try:
-        datasets.fetch_olivetti_faces(download_if_missing=download_if_missing)
-        return False
-    except IOError:
-        return True
-
-
-@pytest.mark.skipif(
-    _is_olivetti_faces_not_available(),
-    reason='Download Olivetti faces dataset to run this test'
-)
-def test_olivetti_faces():
-    data = datasets.fetch_olivetti_faces(shuffle=True, random_state=0)
+def test_olivetti_faces(fetch_olivetti_faces):
+    data = fetch_olivetti_faces(shuffle=True, random_state=0)
 
     assert isinstance(data, Bunch)
     for expected_keys in ('data', 'images', 'target', 'DESCR'):
@@ -40,4 +23,4 @@ def test_olivetti_faces():
     assert_array_equal(np.unique(np.sort(data.target)), np.arange(40))
 
     # test the return_X_y option
-    check_return_X_y(data, datasets.fetch_olivetti_faces)
+    check_return_X_y(data, fetch_olivetti_faces)
diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py
index bb93051029192..f7ecb0e8c7199 100644
--- a/sklearn/datasets/tests/test_rcv1.py
+++ b/sklearn/datasets/tests/test_rcv1.py
@@ -2,31 +2,16 @@
 or if specifically requested via environment variable
 (e.g. for travis cron job)."""
 
-from os import environ
-import pytest
 import scipy.sparse as sp
 import numpy as np
 from functools import partial
-from sklearn.datasets import fetch_rcv1
 from sklearn.datasets.tests.test_common import check_return_X_y
 from sklearn.utils._testing import assert_almost_equal
 from sklearn.utils._testing import assert_array_equal
 
 
-def _fetch_data(*args, **kwargs):
-    # Do not download data, unless explicitly requested via environment var
-    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
-    try:
-        return fetch_rcv1(*args, download_if_missing=download_if_missing,
-                          **kwargs)
-    except IOError:
-        return None
-
-
-@pytest.mark.skipif(_fetch_data() is None,
-                    reason="Download RCV1 to run this test")
-def test_fetch_rcv1():
-    data1 = _fetch_data(shuffle=False)
+def test_fetch_rcv1(fetch_rcv1):
+    data1 = fetch_rcv1(shuffle=False)
     X1, Y1 = data1.data, data1.target
     cat_list, s1 = data1.target_names.tolist(), data1.sample_id
 
@@ -54,12 +39,12 @@ def test_fetch_rcv1():
         assert num == Y1[:, j].data.size
 
     # test shuffling and subset
-    data2 = _fetch_data(shuffle=True, subset='train', random_state=77)
+    data2 = fetch_rcv1(shuffle=True, subset='train', random_state=77)
     X2, Y2 = data2.data, data2.target
     s2 = data2.sample_id
 
     # test return_X_y option
-    fetch_func = partial(_fetch_data, shuffle=False, subset='train')
+    fetch_func = partial(fetch_rcv1, shuffle=False, subset='train')
     check_return_X_y(data2, fetch_func)
 
     # The first 23149 samples are the training samples

From b1df646681803ef33bb6633d84e4031e9418db17 Mon Sep 17 00:00:00 2001
From: Roman Feldbauer <roman@feldbauer.org>
Date: Tue, 25 Feb 2020 19:35:20 +0100
Subject: [PATCH 10/15] Remove pandas hiding fixture [scipy-dev]

---
 sklearn/datasets/tests/conftest.py              | 13 -------------
 sklearn/datasets/tests/test_base.py             |  1 -
 .../datasets/tests/test_california_housing.py   | 17 ++++++++++-------
 3 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/sklearn/datasets/tests/conftest.py b/sklearn/datasets/tests/conftest.py
index fddc22dd80c8f..6c23ecf5a9a96 100644
--- a/sklearn/datasets/tests/conftest.py
+++ b/sklearn/datasets/tests/conftest.py
@@ -63,16 +63,3 @@ def fetch_olivetti_faces():
 @pytest.fixture
 def fetch_rcv1():
     return _wrapped_fetch(_fetch_rcv1, dataset_name='rcv1')
-
-
-@pytest.fixture
-def hide_available_pandas(monkeypatch):
-    """ Pretend pandas was not installed. """
-    import_orig = builtins.__import__
-
-    def mocked_import(name, *args, **kwargs):
-        if name == 'pandas':
-            raise ImportError()
-        return import_orig(name, *args, **kwargs)
-
-    monkeypatch.setattr(builtins, '__import__', mocked_import)
diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py
index f3e7769d0d0f3..a58bdc9ed644d 100644
--- a/sklearn/datasets/tests/test_base.py
+++ b/sklearn/datasets/tests/test_base.py
@@ -257,7 +257,6 @@ def test_toy_dataset_as_frame(loader_func, data_dtype, target_dtype):
     load_linnerud,
     load_wine,
 ])
-@pytest.mark.usefixtures('hide_available_pandas')
 def test_toy_dataset_as_frame_no_pandas(loader_func):
     check_pandas_dependency_message(loader_func)
 
diff --git a/sklearn/datasets/tests/test_california_housing.py b/sklearn/datasets/tests/test_california_housing.py
index ced4a20fc902d..b5d2d7ce9d688 100644
--- a/sklearn/datasets/tests/test_california_housing.py
+++ b/sklearn/datasets/tests/test_california_housing.py
@@ -27,11 +27,14 @@ def test_fetch_asframe(fetch_california_housing):
     assert isinstance(bunch.target, pd.Series)
 
 
-@pytest.mark.usefixtures('hide_available_pandas')
 def test_pandas_dependency_message(fetch_california_housing):
-    # Check that pandas is imported lazily and that an informative error
-    # message is raised when pandas is missing:
-    expected_msg = ('fetch_california_housing with as_frame=True'
-                    ' requires pandas')
-    with pytest.raises(ImportError, match=expected_msg):
-        fetch_california_housing(as_frame=True)
+    try:
+        import pandas  # noqa
+        pytest.skip("This test requires pandas to be not installed")
+    except ImportError:
+        # Check that pandas is imported lazily and that an informative error
+        # message is raised when pandas is missing:
+        expected_msg = ('fetch_california_housing with as_frame=True'
+                        ' requires pandas')
+        with pytest.raises(ImportError, match=expected_msg):
+            fetch_california_housing(as_frame=True)

From 25667a533f02e4fc004daab82550a5e507462b6f Mon Sep 17 00:00:00 2001
From: Roman Feldbauer <roman@feldbauer.org>
Date: Tue, 25 Feb 2020 19:42:53 +0100
Subject: [PATCH 11/15] fix flake8 issue [scipy-dev]

---
 sklearn/datasets/tests/conftest.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/datasets/tests/conftest.py b/sklearn/datasets/tests/conftest.py
index 6c23ecf5a9a96..b5e8f494f4f2e 100644
--- a/sklearn/datasets/tests/conftest.py
+++ b/sklearn/datasets/tests/conftest.py
@@ -1,6 +1,5 @@
 """ Network tests are only run, if data is already locally available,
 or if download is specifically requested by environment variable."""
-import builtins
 from os import environ
 import pytest
 from sklearn.datasets import (

From cfaedfef57bf5922a539d7236925d5f844cb2e19 Mon Sep 17 00:00:00 2001
From: Roman Feldbauer <roman@feldbauer.org>
Date: Thu, 27 Feb 2020 13:34:31 +0100
Subject: [PATCH 12/15] fix name collision [scipy-dev]

---
 sklearn/datasets/tests/conftest.py            | 14 +++++-----
 sklearn/datasets/tests/test_20news.py         | 28 +++++++++----------
 .../datasets/tests/test_california_housing.py | 14 +++++-----
 sklearn/datasets/tests/test_covtype.py        |  8 +++---
 sklearn/datasets/tests/test_kddcup99.py       | 22 +++++++--------
 sklearn/datasets/tests/test_olivetti_faces.py |  6 ++--
 sklearn/datasets/tests/test_rcv1.py           |  8 +++---
 7 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/sklearn/datasets/tests/conftest.py b/sklearn/datasets/tests/conftest.py
index b5e8f494f4f2e..9952fa05386a0 100644
--- a/sklearn/datasets/tests/conftest.py
+++ b/sklearn/datasets/tests/conftest.py
@@ -28,37 +28,37 @@ def wrapped(*args, **kwargs):
 
 
 @pytest.fixture
-def fetch_20newsgroups():
+def fetch_20newsgroups_fxt():
     return _wrapped_fetch(_fetch_20newsgroups, dataset_name='20newsgroups')
 
 
 @pytest.fixture
-def fetch_20newsgroups_vectorized():
+def fetch_20newsgroups_vectorized_fxt():
     return _wrapped_fetch(_fetch_20newsgroups_vectorized,
                           dataset_name='20newsgroups_vectorized')
 
 
 @pytest.fixture
-def fetch_california_housing():
+def fetch_california_housing_fxt():
     return _wrapped_fetch(_fetch_california_housing,
                           dataset_name='california_housing')
 
 
 @pytest.fixture
-def fetch_covtype():
+def fetch_covtype_fxt():
     return _wrapped_fetch(_fetch_covtype, dataset_name='covtype')
 
 
 @pytest.fixture
-def fetch_kddcup99():
+def fetch_kddcup99_fxt():
     return _wrapped_fetch(_fetch_kddcup99, dataset_name='kddcup99')
 
 
 @pytest.fixture
-def fetch_olivetti_faces():
+def fetch_olivetti_faces_fxt():
     return _wrapped_fetch(_fetch_olivetti_faces, dataset_name='olivetti_faces')
 
 
 @pytest.fixture
-def fetch_rcv1():
+def fetch_rcv1_fxt():
     return _wrapped_fetch(_fetch_rcv1, dataset_name='rcv1')
diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py
index d56d05579e311..f800a49238ec1 100644
--- a/sklearn/datasets/tests/test_20news.py
+++ b/sklearn/datasets/tests/test_20news.py
@@ -11,11 +11,11 @@
 from sklearn.preprocessing import normalize
 
 
-def test_20news(fetch_20newsgroups):
-    data = fetch_20newsgroups(subset='all', shuffle=False)
+def test_20news(fetch_20newsgroups_fxt):
+    data = fetch_20newsgroups_fxt(subset='all', shuffle=False)
 
     # Extract a reduced dataset
-    data2cats = fetch_20newsgroups(
+    data2cats = fetch_20newsgroups_fxt(
         subset='all', categories=data.target_names[-1:-3:-1], shuffle=False)
     # Check that the ordering of the target_names is the same
     # as the ordering in the full dataset
@@ -36,53 +36,53 @@ def test_20news(fetch_20newsgroups):
     assert entry1 == entry2
 
     # check that return_X_y option
-    X, y = fetch_20newsgroups(subset='all', shuffle=False, return_X_y=True)
+    X, y = fetch_20newsgroups_fxt(subset='all', shuffle=False, return_X_y=True)
     assert len(X) == len(data.data)
     assert y.shape == data.target.shape
 
 
-def test_20news_length_consistency(fetch_20newsgroups):
+def test_20news_length_consistency(fetch_20newsgroups_fxt):
     """Checks the length consistencies within the bunch
 
     This is a non-regression test for a bug present in 0.16.1.
     """
     # Extract the full dataset
-    data = fetch_20newsgroups(subset='all')
+    data = fetch_20newsgroups_fxt(subset='all')
     assert len(data['data']) == len(data.data)
     assert len(data['target']) == len(data.target)
     assert len(data['filenames']) == len(data.filenames)
 
 
-def test_20news_vectorized(fetch_20newsgroups_vectorized):
+def test_20news_vectorized(fetch_20newsgroups_vectorized_fxt):
     # test subset = train
-    bunch = fetch_20newsgroups_vectorized(subset="train")
+    bunch = fetch_20newsgroups_vectorized_fxt(subset="train")
     assert sp.isspmatrix_csr(bunch.data)
     assert bunch.data.shape == (11314, 130107)
     assert bunch.target.shape[0] == 11314
     assert bunch.data.dtype == np.float64
 
     # test subset = test
-    bunch = fetch_20newsgroups_vectorized(subset="test")
+    bunch = fetch_20newsgroups_vectorized_fxt(subset="test")
     assert sp.isspmatrix_csr(bunch.data)
     assert bunch.data.shape == (7532, 130107)
     assert bunch.target.shape[0] == 7532
     assert bunch.data.dtype == np.float64
 
     # test return_X_y option
-    fetch_func = partial(fetch_20newsgroups_vectorized, subset='test')
+    fetch_func = partial(fetch_20newsgroups_vectorized_fxt, subset='test')
     check_return_X_y(bunch, fetch_func)
 
     # test subset = all
-    bunch = fetch_20newsgroups_vectorized(subset='all')
+    bunch = fetch_20newsgroups_vectorized_fxt(subset='all')
     assert sp.isspmatrix_csr(bunch.data)
     assert bunch.data.shape == (11314 + 7532, 130107)
     assert bunch.target.shape[0] == 11314 + 7532
     assert bunch.data.dtype == np.float64
 
 
-def test_20news_normalization(fetch_20newsgroups_vectorized):
-    X = fetch_20newsgroups_vectorized(normalize=False)
-    X_ = fetch_20newsgroups_vectorized(normalize=True)
+def test_20news_normalization(fetch_20newsgroups_vectorized_fxt):
+    X = fetch_20newsgroups_vectorized_fxt(normalize=False)
+    X_ = fetch_20newsgroups_vectorized_fxt(normalize=True)
     X_norm = X_['data'][:100]
     X = X['data'][:100]
 
diff --git a/sklearn/datasets/tests/test_california_housing.py b/sklearn/datasets/tests/test_california_housing.py
index b5d2d7ce9d688..af1e1ff1370e1 100644
--- a/sklearn/datasets/tests/test_california_housing.py
+++ b/sklearn/datasets/tests/test_california_housing.py
@@ -7,19 +7,19 @@
 from functools import partial
 
 
-def test_fetch(fetch_california_housing):
-    data = fetch_california_housing()
+def test_fetch(fetch_california_housing_fxt):
+    data = fetch_california_housing_fxt()
     assert((20640, 8) == data.data.shape)
     assert((20640, ) == data.target.shape)
 
     # test return_X_y option
-    fetch_func = partial(fetch_california_housing)
+    fetch_func = partial(fetch_california_housing_fxt)
     check_return_X_y(data, fetch_func)
 
 
-def test_fetch_asframe(fetch_california_housing):
+def test_fetch_asframe(fetch_california_housing_fxt):
     pd = pytest.importorskip('pandas')
-    bunch = fetch_california_housing(as_frame=True)
+    bunch = fetch_california_housing_fxt(as_frame=True)
     frame = bunch.frame
     assert hasattr(bunch, 'frame') is True
     assert frame.shape == (20640, 9)
@@ -27,7 +27,7 @@ def test_fetch_asframe(fetch_california_housing):
     assert isinstance(bunch.target, pd.Series)
 
 
-def test_pandas_dependency_message(fetch_california_housing):
+def test_pandas_dependency_message(fetch_california_housing_fxt):
     try:
         import pandas  # noqa
         pytest.skip("This test requires pandas to be not installed")
@@ -37,4 +37,4 @@ def test_pandas_dependency_message(fetch_california_housing):
         expected_msg = ('fetch_california_housing with as_frame=True'
                         ' requires pandas')
         with pytest.raises(ImportError, match=expected_msg):
-            fetch_california_housing(as_frame=True)
+            fetch_california_housing_fxt(as_frame=True)
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index 1b127b0100f1f..d966e6c3890d0 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -6,9 +6,9 @@
 from functools import partial
 
 
-def test_fetch(fetch_covtype):
-    data1 = fetch_covtype(shuffle=True, random_state=42)
-    data2 = fetch_covtype(shuffle=True, random_state=37)
+def test_fetch(fetch_covtype_fxt):
+    data1 = fetch_covtype_fxt(shuffle=True, random_state=42)
+    data2 = fetch_covtype_fxt(shuffle=True, random_state=37)
 
     X1, X2 = data1['data'], data2['data']
     assert (581012, 54) == X1.shape
@@ -21,5 +21,5 @@ def test_fetch(fetch_covtype):
     assert (X1.shape[0],) == y2.shape
 
     # test return_X_y option
-    fetch_func = partial(fetch_covtype)
+    fetch_func = partial(fetch_covtype_fxt)
     check_return_X_y(data1, fetch_func)
diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py
index f0827290ce2e8..899abd2bcb153 100644
--- a/sklearn/datasets/tests/test_kddcup99.py
+++ b/sklearn/datasets/tests/test_kddcup99.py
@@ -10,37 +10,37 @@
 from functools import partial
 
 
-def test_percent10(fetch_kddcup99):
-    data = fetch_kddcup99()
+def test_percent10(fetch_kddcup99_fxt):
+    data = fetch_kddcup99_fxt()
 
     assert data.data.shape == (494021, 41)
     assert data.target.shape == (494021,)
 
-    data_shuffled = fetch_kddcup99(shuffle=True, random_state=0)
+    data_shuffled = fetch_kddcup99_fxt(shuffle=True, random_state=0)
     assert data.data.shape == data_shuffled.data.shape
     assert data.target.shape == data_shuffled.target.shape
 
-    data = fetch_kddcup99('SA')
+    data = fetch_kddcup99_fxt('SA')
     assert data.data.shape == (100655, 41)
     assert data.target.shape == (100655,)
 
-    data = fetch_kddcup99('SF')
+    data = fetch_kddcup99_fxt('SF')
     assert data.data.shape == (73237, 4)
     assert data.target.shape == (73237,)
 
-    data = fetch_kddcup99('http')
+    data = fetch_kddcup99_fxt('http')
     assert data.data.shape == (58725, 3)
     assert data.target.shape == (58725,)
 
-    data = fetch_kddcup99('smtp')
+    data = fetch_kddcup99_fxt('smtp')
     assert data.data.shape == (9571, 3)
     assert data.target.shape == (9571,)
 
-    fetch_func = partial(fetch_kddcup99, 'smtp')
+    fetch_func = partial(fetch_kddcup99_fxt, 'smtp')
     check_return_X_y(data, fetch_func)
 
 
-def test_shuffle(fetch_kddcup99):
-    dataset = fetch_kddcup99(random_state=0, subset='SA', shuffle=True,
-                             percent10=True)
+def test_shuffle(fetch_kddcup99_fxt):
+    dataset = fetch_kddcup99_fxt(random_state=0, subset='SA', shuffle=True,
+                                 percent10=True)
     assert(any(dataset.target[-100:] == b'normal.'))
diff --git a/sklearn/datasets/tests/test_olivetti_faces.py b/sklearn/datasets/tests/test_olivetti_faces.py
index cb76b1f1e87a8..f0c7aa1216e76 100644
--- a/sklearn/datasets/tests/test_olivetti_faces.py
+++ b/sklearn/datasets/tests/test_olivetti_faces.py
@@ -10,8 +10,8 @@
 from sklearn.utils._testing import assert_array_equal
 
 
-def test_olivetti_faces(fetch_olivetti_faces):
-    data = fetch_olivetti_faces(shuffle=True, random_state=0)
+def test_olivetti_faces(fetch_olivetti_faces_fxt):
+    data = fetch_olivetti_faces_fxt(shuffle=True, random_state=0)
 
     assert isinstance(data, Bunch)
     for expected_keys in ('data', 'images', 'target', 'DESCR'):
@@ -23,4 +23,4 @@ def test_olivetti_faces(fetch_olivetti_faces):
     assert_array_equal(np.unique(np.sort(data.target)), np.arange(40))
 
     # test the return_X_y option
-    check_return_X_y(data, fetch_olivetti_faces)
+    check_return_X_y(data, fetch_olivetti_faces_fxt)
diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py
index f7ecb0e8c7199..2c21201dce40e 100644
--- a/sklearn/datasets/tests/test_rcv1.py
+++ b/sklearn/datasets/tests/test_rcv1.py
@@ -10,8 +10,8 @@
 from sklearn.utils._testing import assert_array_equal
 
 
-def test_fetch_rcv1(fetch_rcv1):
-    data1 = fetch_rcv1(shuffle=False)
+def test_fetch_rcv1(fetch_rcv1_fxt):
+    data1 = fetch_rcv1_fxt(shuffle=False)
     X1, Y1 = data1.data, data1.target
     cat_list, s1 = data1.target_names.tolist(), data1.sample_id
 
@@ -39,12 +39,12 @@ def test_fetch_rcv1(fetch_rcv1):
         assert num == Y1[:, j].data.size
 
     # test shuffling and subset
-    data2 = fetch_rcv1(shuffle=True, subset='train', random_state=77)
+    data2 = fetch_rcv1_fxt(shuffle=True, subset='train', random_state=77)
     X2, Y2 = data2.data, data2.target
     s2 = data2.sample_id
 
     # test return_X_y option
-    fetch_func = partial(fetch_rcv1, shuffle=False, subset='train')
+    fetch_func = partial(fetch_rcv1_fxt, shuffle=False, subset='train')
     check_return_X_y(data2, fetch_func)
 
     # The first 23149 samples are the training samples

From dce862c8390e9cf555e4f51000705c6c5e05109a Mon Sep 17 00:00:00 2001
From: Thomas J Fan <thomasjpfan@gmail.com>
Date: Sun, 1 Mar 2020 11:31:07 -0500
Subject: [PATCH 13/15] BLD [scipy-dev]


From df87928feaf81ebf02c00be937806e2c764c51db Mon Sep 17 00:00:00 2001
From: Thomas J Fan <thomasjpfan@gmail.com>
Date: Sun, 1 Mar 2020 21:38:06 -0500
Subject: [PATCH 14/15] STY Removes prefix from fetch_*

---
 sklearn/datasets/tests/conftest.py | 31 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/sklearn/datasets/tests/conftest.py b/sklearn/datasets/tests/conftest.py
index 9952fa05386a0..85242d7335685 100644
--- a/sklearn/datasets/tests/conftest.py
+++ b/sklearn/datasets/tests/conftest.py
@@ -2,15 +2,13 @@
 or if download is specifically requested by environment variable."""
 from os import environ
 import pytest
-from sklearn.datasets import (
-    fetch_20newsgroups as _fetch_20newsgroups,
-    fetch_20newsgroups_vectorized as _fetch_20newsgroups_vectorized,
-    fetch_california_housing as _fetch_california_housing,
-    fetch_covtype as _fetch_covtype,
-    fetch_kddcup99 as _fetch_kddcup99,
-    fetch_olivetti_faces as _fetch_olivetti_faces,
-    fetch_rcv1 as _fetch_rcv1,
-)
+from sklearn.datasets import fetch_20newsgroups
+from sklearn.datasets import fetch_20newsgroups_vectorized
+from sklearn.datasets import fetch_california_housing
+from sklearn.datasets import fetch_covtype
+from sklearn.datasets import fetch_kddcup99
+from sklearn.datasets import fetch_olivetti_faces
+from sklearn.datasets import fetch_rcv1
 
 
 def _wrapped_fetch(f, dataset_name):
@@ -23,42 +21,41 @@ def wrapped(*args, **kwargs):
             return f(*args, **kwargs)
         except IOError:
             pytest.skip("Download {} to run this test".format(dataset_name))
-
     return wrapped
 
 
 @pytest.fixture
 def fetch_20newsgroups_fxt():
-    return _wrapped_fetch(_fetch_20newsgroups, dataset_name='20newsgroups')
+    return _wrapped_fetch(fetch_20newsgroups, dataset_name='20newsgroups')
 
 
 @pytest.fixture
 def fetch_20newsgroups_vectorized_fxt():
-    return _wrapped_fetch(_fetch_20newsgroups_vectorized,
+    return _wrapped_fetch(fetch_20newsgroups_vectorized,
                           dataset_name='20newsgroups_vectorized')
 
 
 @pytest.fixture
 def fetch_california_housing_fxt():
-    return _wrapped_fetch(_fetch_california_housing,
+    return _wrapped_fetch(fetch_california_housing,
                           dataset_name='california_housing')
 
 
 @pytest.fixture
 def fetch_covtype_fxt():
-    return _wrapped_fetch(_fetch_covtype, dataset_name='covtype')
+    return _wrapped_fetch(fetch_covtype, dataset_name='covtype')
 
 
 @pytest.fixture
 def fetch_kddcup99_fxt():
-    return _wrapped_fetch(_fetch_kddcup99, dataset_name='kddcup99')
+    return _wrapped_fetch(fetch_kddcup99, dataset_name='kddcup99')
 
 
 @pytest.fixture
 def fetch_olivetti_faces_fxt():
-    return _wrapped_fetch(_fetch_olivetti_faces, dataset_name='olivetti_faces')
+    return _wrapped_fetch(fetch_olivetti_faces, dataset_name='olivetti_faces')
 
 
 @pytest.fixture
 def fetch_rcv1_fxt():
-    return _wrapped_fetch(_fetch_rcv1, dataset_name='rcv1')
+    return _wrapped_fetch(fetch_rcv1, dataset_name='rcv1')

From 089890f71c34c76e2b41a7214ce9cdaf758e86f2 Mon Sep 17 00:00:00 2001
From: Thomas J Fan <thomasjpfan@gmail.com>
Date: Sun, 1 Mar 2020 21:39:08 -0500
Subject: [PATCH 15/15] BLD [scipy-dev]