Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
eb4d179
Remove deprecated (0.18) cross_validation.py in favor of model_selection
Nov 9, 2017
6dfe9aa
Fix imports (from corss_validation module to model_selection module)
Nov 9, 2017
af42424
Remove tests checking old implementation
Nov 9, 2017
2362011
Remove grid_search and learning_curve also deprecated
Nov 9, 2017
776bba1
Remove gaussian_process
Nov 9, 2017
59e3f7d
remove code to be removed in 0.19
Nov 9, 2017
2ec39c0
remove ransac's residual_metric
Nov 9, 2017
c444763
remove RandomizedPCA (also from docs references etc)
amueller Sep 8, 2017
a2e40d7
remove references to old GP, GMM and sparse_center_data
amueller Sep 8, 2017
7d4b2c1
more cleanup of deprecated scorers
amueller Sep 8, 2017
2ffa7bd
More in scoring
Nov 10, 2017
0bf4146
Remove `hamming_loss` deprecated parameter `classes`
Nov 10, 2017
b36341e
splitter classes (issue:6660) Fix minor stuff
Nov 10, 2017
4b7aa69
Fix doctest expected output
Nov 10, 2017
67d711f
merge
amueller May 22, 2018
195fcf3
unused imports
amueller May 22, 2018
daa5e4b
add vscode to gitignore
amueller May 22, 2018
b72c9b9
delete files again after botched merge.
amueller May 22, 2018
f30720a
import fix
amueller May 24, 2018
f114920
Merge branch 'master' into 0_20_deprecations
amueller May 26, 2018
048a5ca
pep8
amueller May 26, 2018
ddd45bd
pep8
amueller May 26, 2018
49b1498
delete old GMM
amueller May 26, 2018
3144d76
remove deprecated scorers
amueller May 26, 2018
ec66b5d
attributes X_, y_ in isotonic
amueller May 26, 2018
9a879e1
many more deprecations
amueller May 26, 2018
d50d694
remove grid_scores_ tests
amueller May 26, 2018
278add7
export_graphviz out_file deprecation
amueller May 26, 2018
40939f4
LDA learning_method changed to batch
amueller May 26, 2018
2a34f54
change clone test from deprecation warning to error.
amueller May 26, 2018
a53c000
sphinx formatting
amueller May 26, 2018
22a7cde
remove removed classed and functions from classes.rst
amueller May 26, 2018
5d3fbe9
fix doctests
amueller May 26, 2018
cd48afa
Merge branch 'master' into 0_20_deprecations
amueller Jun 4, 2018
9ca4f47
address jnothman's comments; some minor fixes
amueller Jun 4, 2018
5b8746b
Merge branch 'master' into 0_20_deprecations
amueller Jun 4, 2018
fab56a3
keep randomized PCA in incremental benchmark.
amueller Jun 4, 2018
5ca5cfc
Revert change to estimator_checks about GaussianProcessRegressor
jnothman Jun 24, 2018
4bb61c9
Merge branch 'master' into HEAD
jnothman Jun 24, 2018
bdf90c4
Clean up revert of removing rpca from benchmark
jnothman Jun 24, 2018
ee5710d
Fix exception of GPR
jnothman Jun 24, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ benchmarks/bench_covertype_data/
*.prefs
.pydevproject
.idea
.vscode

*.c
*.cpp
Expand Down
14 changes: 4 additions & 10 deletions benchmarks/bench_plot_incremental_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from collections import defaultdict
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_lfw_people
from sklearn.decomposition import IncrementalPCA, RandomizedPCA, PCA
from sklearn.decomposition import IncrementalPCA, PCA


def plot_results(X, y, label):
Expand All @@ -37,7 +37,6 @@ def plot_feature_times(all_times, batch_size, all_components, data):
plot_results(all_components, all_times['pca'], label="PCA")
plot_results(all_components, all_times['ipca'],
label="IncrementalPCA, bsize=%i" % batch_size)
plot_results(all_components, all_times['rpca'], label="RandomizedPCA")
plt.legend(loc="upper left")
plt.suptitle("Algorithm runtime vs. n_components\n \
LFW, size %i x %i" % data.shape)
Expand All @@ -50,7 +49,6 @@ def plot_feature_errors(all_errors, batch_size, all_components, data):
plot_results(all_components, all_errors['pca'], label="PCA")
plot_results(all_components, all_errors['ipca'],
label="IncrementalPCA, bsize=%i" % batch_size)
plot_results(all_components, all_errors['rpca'], label="RandomizedPCA")
plt.legend(loc="lower left")
plt.suptitle("Algorithm error vs. n_components\n"
"LFW, size %i x %i" % data.shape)
Expand All @@ -61,7 +59,6 @@ def plot_feature_errors(all_errors, batch_size, all_components, data):
def plot_batch_times(all_times, n_features, all_batch_sizes, data):
plt.figure()
plot_results(all_batch_sizes, all_times['pca'], label="PCA")
plot_results(all_batch_sizes, all_times['rpca'], label="RandomizedPCA")
plot_results(all_batch_sizes, all_times['ipca'], label="IncrementalPCA")
plt.legend(loc="lower left")
plt.suptitle("Algorithm runtime vs. batch_size for n_components %i\n \
Expand Down Expand Up @@ -92,11 +89,9 @@ def fixed_batch_size_comparison(data):
all_errors = defaultdict(list)
for n_components in all_features:
pca = PCA(n_components=n_components)
rpca = RandomizedPCA(n_components=n_components, random_state=1999)
ipca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
results_dict = {k: benchmark(est, data) for k, est in [('pca', pca),
('ipca', ipca),
('rpca', rpca)]}
('ipca', ipca)]}

for k in sorted(results_dict.keys()):
all_times[k].append(results_dict[k]['time'])
Expand All @@ -116,7 +111,8 @@ def variable_batch_size_comparison(data):
all_times = defaultdict(list)
all_errors = defaultdict(list)
pca = PCA(n_components=n_components)
rpca = RandomizedPCA(n_components=n_components, random_state=1999)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we keep PCA(svd_solver='randomized') here?

rpca = PCA(n_components=n_components, svd_solver='randomized',
random_state=1999)
results_dict = {k: benchmark(est, data) for k, est in [('pca', pca),
('rpca', rpca)]}

Expand All @@ -138,8 +134,6 @@ def variable_batch_size_comparison(data):
all_errors['ipca'].append(results_dict['ipca']['error'])

plot_batch_times(all_times, n_components, batch_sizes, data)
# RandomizedPCA error is always worse (approx 100x) than other PCA
# tests
plot_batch_errors(all_errors, n_components, batch_sizes, data)

faces = fetch_lfw_people(resize=.2, min_faces_per_person=5)
Expand Down
43 changes: 0 additions & 43 deletions doc/modules/classes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1511,46 +1511,3 @@ To be removed in 0.21

datasets.load_mlcomp
linear_model.lasso_stability_path


To be removed in 0.20
---------------------

.. autosummary::
:toctree: generated/
:template: deprecated_class.rst

cross_validation.KFold
cross_validation.LabelKFold
cross_validation.LeaveOneLabelOut
cross_validation.LeaveOneOut
cross_validation.LeavePOut
cross_validation.LeavePLabelOut
cross_validation.LabelShuffleSplit
cross_validation.ShuffleSplit
cross_validation.StratifiedKFold
cross_validation.StratifiedShuffleSplit
cross_validation.PredefinedSplit
decomposition.RandomizedPCA
gaussian_process.GaussianProcess
grid_search.ParameterGrid
grid_search.ParameterSampler
grid_search.GridSearchCV
grid_search.RandomizedSearchCV
mixture.DPGMM
mixture.GMM
mixture.VBGMM


.. autosummary::
:toctree: generated/
:template: deprecated_function.rst

cross_validation.check_cv
cross_validation.cross_val_predict
cross_validation.cross_val_score
cross_validation.permutation_test_score
cross_validation.train_test_split
grid_search.fit_grid_point
learning_curve.learning_curve
learning_curve.validation_curve
5 changes: 2 additions & 3 deletions doc/modules/preprocessing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,8 @@ data.
independently, since a downstream model can further make some assumption
on the linear independence of the features.

To address this issue you can use :class:`sklearn.decomposition.PCA`
or :class:`sklearn.decomposition.RandomizedPCA` with ``whiten=True``
to further remove the linear correlation across features.
To address this issue you can use :class:`sklearn.decomposition.PCA` with
``whiten=True`` to further remove the linear correlation across features.

.. topic:: Scaling a 1D array

Expand Down
2 changes: 1 addition & 1 deletion doc/tutorial/machine_learning_map/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ Click on any estimator in the chart below to see its documentation.
<area href="../../modules/svm.html#classification" title="SVC Documentation" shape="poly" coords="210,157, 210,157, 210,194, 210,194, 210,194, 333,194, 333,194, 333,194, 333,157, 333,157, 333,157, 210,157, 210,157" data-maphilight='{"strokeColor":"0000ff","strokeWidth":5,"fillColor":"66FF66","fillOpacity":0.4}'></area>
<area href="../../modules/svm.html#regression" title="SVR Documentation" shape="poly" coords="1696,692, 1696,692, 1696,732, 1696,732, 1696,732, 1890,732, 1890,732, 1890,732, 1890,692, 1890,692, 1890,692, 1696,692, 1696,692" data-maphilight='{"strokeColor":"0000ff","strokeWidth":5,"fillColor":"66FF66","fillOpacity":0.4}'></area>
<area href="../../modules/svm.html#regression" title="SVR Documentation" shape="poly" coords="1831,458, 1831,458, 1831,496, 1831,496, 1831,496, 2052,496, 2052,496, 2052,496, 2052,458, 2052,458, 2052,458, 1831,458, 1831,458" data-maphilight='{"strokeColor":"0000ff","strokeWidth":5,"fillColor":"66FF66","fillOpacity":0.4}'></area>
<area href="../../modules/mixture.html#vbgmm-classifier-variational-gaussian-mixtures" title=" VBGMM Documentation" shape="poly" coords="562,994, 562,994, 562,1026, 562,1026, 562,1026, 682,1026, 682,1026, 682,1026, 682,994, 682,994, 682,994, 562,994, 562,994" data-maphilight='{"strokeColor":"0000ff","strokeWidth":5,"fillColor":"66FF66","fillOpacity":0.4}'></area>
<area href="../../modules/mixture.html#bgmm" title=" Bayesian GMM Documentation" shape="poly" coords="562,994, 562,994, 562,1026, 562,1026, 562,1026, 682,1026, 682,1026, 682,1026, 682,994, 682,994, 682,994, 562,994, 562,994" data-maphilight='{"strokeColor":"0000ff","strokeWidth":5,"fillColor":"66FF66","fillOpacity":0.4}'></area>
</map>
</img>
</p>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@

# Choose cross-validation techniques for the inner and outer loops,
# independently of the dataset.
# E.g "LabelKFold", "LeaveOneOut", "LeaveOneLabelOut", etc.
# E.g "GroupKFold", "LeaveOneOut", "LeaveOneGroupOut", etc.
inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)
outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)

Expand Down
9 changes: 4 additions & 5 deletions sklearn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,10 @@
__check_build # avoid flakes unused variable error

__all__ = ['calibration', 'cluster', 'covariance', 'cross_decomposition',
'cross_validation', 'datasets', 'decomposition', 'dummy',
'ensemble', 'exceptions', 'externals', 'feature_extraction',
'feature_selection', 'gaussian_process', 'grid_search',
'isotonic', 'kernel_approximation', 'kernel_ridge',
'learning_curve', 'linear_model', 'manifold', 'metrics',
'datasets', 'decomposition', 'dummy', 'ensemble', 'exceptions',
'externals', 'feature_extraction', 'feature_selection',
'gaussian_process', 'isotonic', 'kernel_approximation',
'kernel_ridge', 'linear_model', 'manifold', 'metrics',
'mixture', 'model_selection', 'multiclass', 'multioutput',
'naive_bayes', 'neighbors', 'neural_network', 'pipeline',
'preprocessing', 'random_projection', 'semi_supervised',
Expand Down
51 changes: 2 additions & 49 deletions sklearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,57 +67,10 @@ def clone(estimator, safe=True):
for name in new_object_params:
param1 = new_object_params[name]
param2 = params_set[name]
if param1 is param2:
# this should always happen
continue
if isinstance(param1, np.ndarray):
# For most ndarrays, we do not test for complete equality
if not isinstance(param2, type(param1)):
equality_test = False
elif (param1.ndim > 0
and param1.shape[0] > 0
and isinstance(param2, np.ndarray)
and param2.ndim > 0
and param2.shape[0] > 0):
equality_test = (
param1.shape == param2.shape
and param1.dtype == param2.dtype
and (_first_and_last_element(param1) ==
_first_and_last_element(param2))
)
else:
equality_test = np.all(param1 == param2)
elif sparse.issparse(param1):
# For sparse matrices equality doesn't work
if not sparse.issparse(param2):
equality_test = False
elif param1.size == 0 or param2.size == 0:
equality_test = (
param1.__class__ == param2.__class__
and param1.size == 0
and param2.size == 0
)
else:
equality_test = (
param1.__class__ == param2.__class__
and (_first_and_last_element(param1) ==
_first_and_last_element(param2))
and param1.nnz == param2.nnz
and param1.shape == param2.shape
)
else:
# fall back on standard equality
equality_test = param1 == param2
if equality_test:
warnings.warn("Estimator %s modifies parameters in __init__."
" This behavior is deprecated as of 0.18 and "
"support for this behavior will be removed in 0.20."
% type(estimator).__name__, DeprecationWarning)
else:
if param1 is not param2:
raise RuntimeError('Cannot clone object %s, as the constructor '
'does not seem to set parameter %s' %
'either does not set or modifies parameter %s' %
(estimator, name))

return new_object


Expand Down
4 changes: 3 additions & 1 deletion sklearn/cluster/tests/test_hierarchical.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def test_deprecation_of_n_components_in_linkage_tree():
assert_equal(n_leaves, n_leaves_t)
assert_equal(parent, parent_t)


def test_linkage_misc():
# Misc tests on linkage
rng = np.random.RandomState(42)
Expand Down Expand Up @@ -511,7 +512,8 @@ def test_connectivity_callable():
connectivity = kneighbors_graph(X, 3, include_self=False)
aglc1 = AgglomerativeClustering(connectivity=connectivity)
aglc2 = AgglomerativeClustering(
connectivity=partial(kneighbors_graph, n_neighbors=3, include_self=False))
connectivity=partial(kneighbors_graph, n_neighbors=3,
include_self=False))
aglc1.fit(X)
aglc2.fit(X)
assert_array_equal(aglc1.labels_, aglc2.labels_)
Expand Down
Loading