scikit-learn · jnothman · Jun 24, 2018 · Nov 9, 2017 · Nov 9, 2017 · Nov 9, 2017
diff --git a/.gitignore b/.gitignore
@@ -54,6 +54,7 @@ benchmarks/bench_covertype_data/
 *.prefs
 .pydevproject
 .idea
+.vscode
 
 *.c
 *.cpp

diff --git a/benchmarks/bench_plot_incremental_pca.py b/benchmarks/bench_plot_incremental_pca.py
@@ -13,7 +13,7 @@
 from collections import defaultdict
 import matplotlib.pyplot as plt
 from sklearn.datasets import fetch_lfw_people
-from sklearn.decomposition import IncrementalPCA, RandomizedPCA, PCA
+from sklearn.decomposition import IncrementalPCA, PCA
 
 
 def plot_results(X, y, label):
@@ -37,7 +37,6 @@ def plot_feature_times(all_times, batch_size, all_components, data):
     plot_results(all_components, all_times['pca'], label="PCA")
     plot_results(all_components, all_times['ipca'],
                  label="IncrementalPCA, bsize=%i" % batch_size)
-    plot_results(all_components, all_times['rpca'], label="RandomizedPCA")
     plt.legend(loc="upper left")
     plt.suptitle("Algorithm runtime vs. n_components\n \
                  LFW, size %i x %i" % data.shape)
@@ -50,7 +49,6 @@ def plot_feature_errors(all_errors, batch_size, all_components, data):
     plot_results(all_components, all_errors['pca'], label="PCA")
     plot_results(all_components, all_errors['ipca'],
                  label="IncrementalPCA, bsize=%i" % batch_size)
-    plot_results(all_components, all_errors['rpca'], label="RandomizedPCA")
     plt.legend(loc="lower left")
     plt.suptitle("Algorithm error vs. n_components\n"
                  "LFW, size %i x %i" % data.shape)
@@ -61,7 +59,6 @@ def plot_feature_errors(all_errors, batch_size, all_components, data):
 def plot_batch_times(all_times, n_features, all_batch_sizes, data):
     plt.figure()
     plot_results(all_batch_sizes, all_times['pca'], label="PCA")
-    plot_results(all_batch_sizes, all_times['rpca'], label="RandomizedPCA")
     plot_results(all_batch_sizes, all_times['ipca'], label="IncrementalPCA")
     plt.legend(loc="lower left")
     plt.suptitle("Algorithm runtime vs. batch_size for n_components %i\n \
@@ -92,11 +89,9 @@ def fixed_batch_size_comparison(data):
     all_errors = defaultdict(list)
     for n_components in all_features:
         pca = PCA(n_components=n_components)
-        rpca = RandomizedPCA(n_components=n_components, random_state=1999)
         ipca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
         results_dict = {k: benchmark(est, data) for k, est in [('pca', pca),
-                                                               ('ipca', ipca),
-                                                               ('rpca', rpca)]}
+                                                               ('ipca', ipca)]}
 
         for k in sorted(results_dict.keys()):
             all_times[k].append(results_dict[k]['time'])
@@ -116,7 +111,8 @@ def variable_batch_size_comparison(data):
         all_times = defaultdict(list)
         all_errors = defaultdict(list)
         pca = PCA(n_components=n_components)
-        rpca = RandomizedPCA(n_components=n_components, random_state=1999)
+        rpca = PCA(n_components=n_components, svd_solver='randomized',
+                   random_state=1999)
         results_dict = {k: benchmark(est, data) for k, est in [('pca', pca),
                                                                ('rpca', rpca)]}
 
@@ -138,8 +134,6 @@ def variable_batch_size_comparison(data):
             all_errors['ipca'].append(results_dict['ipca']['error'])
 
         plot_batch_times(all_times, n_components, batch_sizes, data)
-        # RandomizedPCA error is always worse (approx 100x) than other PCA
-        # tests
         plot_batch_errors(all_errors, n_components, batch_sizes, data)
 
 faces = fetch_lfw_people(resize=.2, min_faces_per_person=5)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -1511,46 +1511,3 @@ To be removed in 0.21
 
    datasets.load_mlcomp
    linear_model.lasso_stability_path
-
-
-To be removed in 0.20
----------------------
-
-.. autosummary::
-   :toctree: generated/
-   :template: deprecated_class.rst
-
-   cross_validation.KFold
-   cross_validation.LabelKFold
-   cross_validation.LeaveOneLabelOut
-   cross_validation.LeaveOneOut
-   cross_validation.LeavePOut
-   cross_validation.LeavePLabelOut
-   cross_validation.LabelShuffleSplit
-   cross_validation.ShuffleSplit
-   cross_validation.StratifiedKFold
-   cross_validation.StratifiedShuffleSplit
-   cross_validation.PredefinedSplit
-   decomposition.RandomizedPCA
-   gaussian_process.GaussianProcess
-   grid_search.ParameterGrid
-   grid_search.ParameterSampler
-   grid_search.GridSearchCV
-   grid_search.RandomizedSearchCV
-   mixture.DPGMM
-   mixture.GMM
-   mixture.VBGMM
-
-
-.. autosummary::
-   :toctree: generated/
-   :template: deprecated_function.rst
-
-   cross_validation.check_cv
-   cross_validation.cross_val_predict
-   cross_validation.cross_val_score
-   cross_validation.permutation_test_score
-   cross_validation.train_test_split
-   grid_search.fit_grid_point
-   learning_curve.learning_curve
-   learning_curve.validation_curve
diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
@@ -235,9 +235,8 @@ data.
   independently, since a downstream model can further make some assumption
   on the linear independence of the features.
 
-  To address this issue you can use :class:`sklearn.decomposition.PCA`
-  or :class:`sklearn.decomposition.RandomizedPCA` with ``whiten=True``
-  to further remove the linear correlation across features.
+  To address this issue you can use :class:`sklearn.decomposition.PCA` with
+  ``whiten=True`` to further remove the linear correlation across features.
 
 .. topic:: Scaling a 1D array
 

diff --git a/doc/tutorial/machine_learning_map/index.rst b/doc/tutorial/machine_learning_map/index.rst
@@ -100,7 +100,7 @@ Click on any estimator in the chart below to see its documentation.
 		<area href="../../modules/svm.html#classification" title="SVC Documentation" shape="poly" coords="210,157, 210,157, 210,194, 210,194, 210,194, 333,194, 333,194, 333,194, 333,157, 333,157, 333,157, 210,157, 210,157" data-maphilight='{"strokeColor":"0000ff","strokeWidth":5,"fillColor":"66FF66","fillOpacity":0.4}'></area>
 		<area href="../../modules/svm.html#regression" title="SVR Documentation" shape="poly" coords="1696,692, 1696,692, 1696,732, 1696,732, 1696,732, 1890,732, 1890,732, 1890,732, 1890,692, 1890,692, 1890,692, 1696,692, 1696,692" data-maphilight='{"strokeColor":"0000ff","strokeWidth":5,"fillColor":"66FF66","fillOpacity":0.4}'></area>
 		<area href="../../modules/svm.html#regression" title="SVR Documentation" shape="poly" coords="1831,458, 1831,458, 1831,496, 1831,496, 1831,496, 2052,496, 2052,496, 2052,496, 2052,458, 2052,458, 2052,458, 1831,458, 1831,458" data-maphilight='{"strokeColor":"0000ff","strokeWidth":5,"fillColor":"66FF66","fillOpacity":0.4}'></area>
-		<area href="../../modules/mixture.html#vbgmm-classifier-variational-gaussian-mixtures" title=" VBGMM Documentation" shape="poly" coords="562,994, 562,994, 562,1026, 562,1026, 562,1026, 682,1026, 682,1026, 682,1026, 682,994, 682,994, 682,994, 562,994, 562,994" data-maphilight='{"strokeColor":"0000ff","strokeWidth":5,"fillColor":"66FF66","fillOpacity":0.4}'></area>
+		<area href="../../modules/mixture.html#bgmm" title=" Bayesian GMM Documentation" shape="poly" coords="562,994, 562,994, 562,1026, 562,1026, 562,1026, 682,1026, 682,1026, 682,1026, 682,994, 682,994, 682,994, 562,994, 562,994" data-maphilight='{"strokeColor":"0000ff","strokeWidth":5,"fillColor":"66FF66","fillOpacity":0.4}'></area>
 	    </map>
 	</img>
       </p>

diff --git a/examples/model_selection/plot_nested_cross_validation_iris.py b/examples/model_selection/plot_nested_cross_validation_iris.py
@@ -75,7 +75,7 @@
 
     # Choose cross-validation techniques for the inner and outer loops,
     # independently of the dataset.
-    # E.g "LabelKFold", "LeaveOneOut", "LeaveOneLabelOut", etc.
+    # E.g "GroupKFold", "LeaveOneOut", "LeaveOneGroupOut", etc.
     inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)
     outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)
 

diff --git a/sklearn/__init__.py b/sklearn/__init__.py
@@ -65,11 +65,10 @@
     __check_build  # avoid flakes unused variable error
 
     __all__ = ['calibration', 'cluster', 'covariance', 'cross_decomposition',
-               'cross_validation', 'datasets', 'decomposition', 'dummy',
-               'ensemble', 'exceptions', 'externals', 'feature_extraction',
-               'feature_selection', 'gaussian_process', 'grid_search',
-               'isotonic', 'kernel_approximation', 'kernel_ridge',
-               'learning_curve', 'linear_model', 'manifold', 'metrics',
+               'datasets', 'decomposition', 'dummy', 'ensemble', 'exceptions',
+               'externals', 'feature_extraction', 'feature_selection',
+               'gaussian_process', 'isotonic', 'kernel_approximation',
+               'kernel_ridge', 'linear_model', 'manifold', 'metrics',
                'mixture', 'model_selection', 'multiclass', 'multioutput',
                'naive_bayes', 'neighbors', 'neural_network', 'pipeline',
                'preprocessing', 'random_projection', 'semi_supervised',

diff --git a/sklearn/base.py b/sklearn/base.py
@@ -67,57 +67,10 @@ def clone(estimator, safe=True):
     for name in new_object_params:
         param1 = new_object_params[name]
         param2 = params_set[name]
-        if param1 is param2:
-            # this should always happen
-            continue
-        if isinstance(param1, np.ndarray):
-            # For most ndarrays, we do not test for complete equality
-            if not isinstance(param2, type(param1)):
-                equality_test = False
-            elif (param1.ndim > 0
-                    and param1.shape[0] > 0
-                    and isinstance(param2, np.ndarray)
-                    and param2.ndim > 0
-                    and param2.shape[0] > 0):
-                equality_test = (
-                    param1.shape == param2.shape
-                    and param1.dtype == param2.dtype
-                    and (_first_and_last_element(param1) ==
-                         _first_and_last_element(param2))
-                )
-            else:
-                equality_test = np.all(param1 == param2)
-        elif sparse.issparse(param1):
-            # For sparse matrices equality doesn't work
-            if not sparse.issparse(param2):
-                equality_test = False
-            elif param1.size == 0 or param2.size == 0:
-                equality_test = (
-                    param1.__class__ == param2.__class__
-                    and param1.size == 0
-                    and param2.size == 0
-                )
-            else:
-                equality_test = (
-                    param1.__class__ == param2.__class__
-                    and (_first_and_last_element(param1) ==
-                         _first_and_last_element(param2))
-                    and param1.nnz == param2.nnz
-                    and param1.shape == param2.shape
-                )
-        else:
-            # fall back on standard equality
-            equality_test = param1 == param2
-        if equality_test:
-            warnings.warn("Estimator %s modifies parameters in __init__."
-                          " This behavior is deprecated as of 0.18 and "
-                          "support for this behavior will be removed in 0.20."
-                          % type(estimator).__name__, DeprecationWarning)
-        else:
+        if param1 is not param2:
             raise RuntimeError('Cannot clone object %s, as the constructor '
-                               'does not seem to set parameter %s' %
+                               'either does not set or modifies parameter %s' %
                                (estimator, name))
-
     return new_object
 
 

diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py
@@ -51,6 +51,7 @@ def test_deprecation_of_n_components_in_linkage_tree():
     assert_equal(n_leaves, n_leaves_t)
     assert_equal(parent, parent_t)
 
+
 def test_linkage_misc():
     # Misc tests on linkage
     rng = np.random.RandomState(42)
@@ -511,7 +512,8 @@ def test_connectivity_callable():
     connectivity = kneighbors_graph(X, 3, include_self=False)
     aglc1 = AgglomerativeClustering(connectivity=connectivity)
     aglc2 = AgglomerativeClustering(
-        connectivity=partial(kneighbors_graph, n_neighbors=3, include_self=False))
+        connectivity=partial(kneighbors_graph, n_neighbors=3,
+                             include_self=False))
     aglc1.fit(X)
     aglc2.fit(X)
     assert_array_equal(aglc1.labels_, aglc2.labels_)
-Original file line number
+Diff line change
@@ Expand Up / @@ -54,6 +54,7 @@ benchmarks/bench_covertype_data/ @@
     *.prefs
     .pydevproject
     .idea
+    .vscode
     *.c
     *.cpp
@@ Expand Down @@