Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/applications/face_recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@
import logging
import matplotlib.pyplot as plt

from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import fetch_lfw_people
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import RandomizedPCA
Expand Down
2 changes: 1 addition & 1 deletion examples/calibration/plot_calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import brier_score_loss
from sklearn.calibration import CalibratedClassifierCV
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split


n_samples = 50000
Expand Down
2 changes: 1 addition & 1 deletion examples/calibration/plot_calibration_curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
from sklearn.metrics import (brier_score_loss, precision_score, recall_score,
f1_score)
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split


# Create dataset of classification task with many redundant and few
Expand Down
2 changes: 1 addition & 1 deletion examples/classification/plot_classifier_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neighbors import KNeighborsClassifier
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@
from sklearn.cluster import FeatureAgglomeration
from sklearn.linear_model import BayesianRidge
from sklearn.pipeline import Pipeline
from sklearn.grid_search import GridSearchCV
from sklearn.externals.joblib import Memory
from sklearn.cross_validation import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

###############################################################################
# Generate data
Expand Down Expand Up @@ -60,7 +60,7 @@

###############################################################################
# Compute the coefs of a Bayesian Ridge with GridSearch
cv = KFold(len(y), 2) # cross-validation generator for model selection
cv = KFold(2) # cross-validation generator for model selection
ridge = BayesianRidge()
cachedir = tempfile.mkdtemp()
mem = Memory(cachedir=cachedir, verbose=1)
Expand Down
2 changes: 1 addition & 1 deletion examples/covariance/plot_covariance_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

from sklearn.covariance import LedoitWolf, OAS, ShrunkCovariance, \
log_likelihood, empirical_covariance
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import GridSearchCV


###############################################################################
Expand Down
4 changes: 2 additions & 2 deletions examples/decomposition/plot_pca_vs_fa_model_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@

from sklearn.decomposition import PCA, FactorAnalysis
from sklearn.covariance import ShrunkCovariance, LedoitWolf
from sklearn.cross_validation import cross_val_score
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

###############################################################################
# Create the data
Expand Down
8 changes: 4 additions & 4 deletions examples/ensemble/plot_gradient_boosting_oob.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
import matplotlib.pyplot as plt

from sklearn import ensemble
from sklearn.cross_validation import KFold
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split


# Generate data (adapted from G. Ridgeway's gbm example)
Expand Down Expand Up @@ -75,10 +75,10 @@ def heldout_score(clf, X_test, y_test):


def cv_estimate(n_folds=3):
cv = KFold(n=X_train.shape[0], n_folds=n_folds)
cv = KFold(n_folds=n_folds)
cv_clf = ensemble.GradientBoostingClassifier(**params)
val_scores = np.zeros((n_estimators,), dtype=np.float64)
for train, test in cv:
for train, test in cv.split(X_train, y_train):
cv_clf.fit(X_train[train], y_train[train])
val_scores += heldout_score(cv_clf, X_train[test], y_train[test])
val_scores /= n_folds
Expand Down
2 changes: 1 addition & 1 deletion examples/ensemble/plot_partial_dependence.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@

from mpl_toolkits.mplot3d import Axes3D

from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble.partial_dependence import plot_partial_dependence
from sklearn.ensemble.partial_dependence import partial_dependence
Expand Down
16 changes: 10 additions & 6 deletions examples/exercises/plot_cv_diabetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,25 @@
import numpy as np
import matplotlib.pyplot as plt

from sklearn import cross_validation, datasets, linear_model
from sklearn import datasets
from sklearn.linear_model import LassoCV
from sklearn.linear_model import Lasso
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

diabetes = datasets.load_diabetes()
X = diabetes.data[:150]
y = diabetes.target[:150]

lasso = linear_model.Lasso()
lasso = Lasso()
alphas = np.logspace(-4, -.5, 30)

scores = list()
scores_std = list()

for alpha in alphas:
lasso.alpha = alpha
this_scores = cross_validation.cross_val_score(lasso, X, y, n_jobs=1)
this_scores = cross_val_score(lasso, X, y, n_jobs=1)
scores.append(np.mean(this_scores))
scores_std.append(np.std(this_scores))

Expand All @@ -51,15 +55,15 @@
# performs cross-validation on the training data it receives).
# We use external cross-validation to see how much the automatically obtained
# alphas differ across different cross-validation folds.
lasso_cv = linear_model.LassoCV(alphas=alphas)
k_fold = cross_validation.KFold(len(X), 3)
lasso_cv = LassoCV(alphas=alphas)
k_fold = KFold(3)

print("Answer to the bonus question:",
"how much can you trust the selection of alpha?")
print()
print("Alpha parameters maximising the generalization score on different")
print("subsets of the data:")
for k, (train, test) in enumerate(k_fold):
for k, (train, test) in enumerate(k_fold.split(X, y)):
lasso_cv.fit(X[train], y[train])
print("[fold {0}] alpha: {1:.5f}, score: {2:.5f}".
format(k, lasso_cv.alpha_, lasso_cv.score(X[test], y[test])))
Expand Down
5 changes: 3 additions & 2 deletions examples/exercises/plot_cv_digits.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@


import numpy as np
from sklearn import cross_validation, datasets, svm
from sklearn.model_selection import cross_val_score
from sklearn import datasets, svm

digits = datasets.load_digits()
X = digits.data
Expand All @@ -25,7 +26,7 @@
scores_std = list()
for C in C_s:
svc.C = C
this_scores = cross_validation.cross_val_score(svc, X, y, n_jobs=1)
this_scores = cross_val_score(svc, X, y, n_jobs=1)
scores.append(np.mean(this_scores))
scores_std.append(np.std(this_scores))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.cross_validation import StratifiedKFold, permutation_test_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import permutation_test_score
from sklearn import datasets


Expand All @@ -39,7 +40,7 @@
X = np.c_[X, E]

svm = SVC(kernel='linear')
cv = StratifiedKFold(y, 2)
cv = StratifiedKFold(2)

score, permutation_scores, pvalue = permutation_test_score(
svm, X, y, scoring="accuracy", cv=cv, n_permutations=100, n_jobs=1)
Expand Down
4 changes: 2 additions & 2 deletions examples/feature_selection/plot_rfe_with_cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.cross_validation import StratifiedKFold
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFECV
from sklearn.datasets import make_classification

Expand All @@ -23,7 +23,7 @@
svc = SVC(kernel="linear")
# The "accuracy" scoring is proportional to the number of correct
# classifications
rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(y, 2),
rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(2),
scoring='accuracy')
rfecv.fit(X, y)

Expand Down
2 changes: 1 addition & 1 deletion examples/feature_stacker.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# License: BSD 3 clause

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
Expand Down
9 changes: 5 additions & 4 deletions examples/gaussian_process/gp_diabetes_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@

from sklearn import datasets
from sklearn.gaussian_process import GaussianProcess
from sklearn.cross_validation import cross_val_score, KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

# Load the dataset from scikit's data sets
diabetes = datasets.load_diabetes()
Expand All @@ -43,9 +44,9 @@
gp.theta0 = gp.theta_ # Given correlation parameter = MLE
gp.thetaL, gp.thetaU = None, None # None bounds deactivate MLE

# Perform a cross-validation estimate of the coefficient of determination using
# the cross_validation module using all CPUs available on the machine
# Perform a cross-validated estimate of the coefficient of determination using
# the model_selection.cross_val_score using all CPUs available on the machine
K = 20 # folds
R2 = cross_val_score(gp, X, y=y, cv=KFold(y.size, K), n_jobs=1).mean()
R2 = cross_val_score(gp, X, y=y, cv=KFold(K), n_jobs=1).mean()
print("The %d-Folds estimate of the coefficient of determination is R2 = %s"
% (K, R2))
2 changes: 1 addition & 1 deletion examples/linear_model/plot_sgd_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import matplotlib.pyplot as plt
from sklearn import datasets

from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier, Perceptron
from sklearn.linear_model import PassiveAggressiveClassifier

Expand Down
2 changes: 1 addition & 1 deletion examples/missing_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
from sklearn.cross_validation import cross_val_score
from sklearn.model_selection import cross_val_score

rng = np.random.RandomState(0)

Expand Down
6 changes: 3 additions & 3 deletions examples/mixture/plot_gmm_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import numpy as np

from sklearn import datasets
from sklearn.cross_validation import StratifiedKFold
from sklearn.model_selection import StratifiedKFold
from sklearn.externals.six.moves import xrange
from sklearn.mixture import GMM

Expand All @@ -55,9 +55,9 @@ def make_ellipses(gmm, ax):

# Break up the dataset into non-overlapping training (75%) and testing
# (25%) sets.
skf = StratifiedKFold(iris.target, n_folds=4)
skf = StratifiedKFold(n_folds=4)
# Only take the first fold.
train_index, test_index = next(iter(skf))
train_index, test_index = next(iter(skf.split(iris.data, iris.target)))


X_train = iris.data[train_index]
Expand Down
3 changes: 1 addition & 2 deletions examples/model_selection/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@
Model Selection
-----------------------

Examples concerning model selection, mostly contained in the
:mod:`sklearn.grid_search` and :mod:`sklearn.cross_validation` modules.
Examples related to the :mod:`sklearn.model_selection` module.
4 changes: 2 additions & 2 deletions examples/model_selection/grid_search_digits.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
from __future__ import print_function

from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

"""
==========================================================
Sample pipeline for text feature extraction and evaluation
Expand Down Expand Up @@ -56,7 +57,7 @@
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.linear_model import SGDClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

print(__doc__)
Expand Down
2 changes: 1 addition & 1 deletion examples/model_selection/plot_confusion_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import matplotlib.pyplot as plt

from sklearn import svm, datasets
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

# import some data to play with
Expand Down
28 changes: 18 additions & 10 deletions examples/model_selection/plot_learning_curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@

import numpy as np
import matplotlib.pyplot as plt
from sklearn import cross_validation
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.datasets import load_digits
from sklearn.learning_curve import learning_curve
from sklearn.model_selection import learning_curve
from sklearn.model_selection import ShuffleSplit


def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
Expand All @@ -48,10 +48,20 @@ def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
ylim : tuple, shape (ymin, ymax), optional
Defines minimum and maximum yvalues plotted.

cv : integer, cross-validation generator, optional
If an integer is passed, it is the number of folds (defaults to 3).
Specific cross-validation objects can be passed, see
sklearn.cross_validation module for the list of possible objects
cv : int, cross-validation generator or an iterable, optional
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
- None, to use the default 3-fold cross-validation,
- integer, to specify the number of folds.
- An object to be used as a cross-validation generator.
- An iterable yielding train/test splits.

For integer/None inputs, if ``y`` is binary or multiclass,
:class:`StratifiedKFold` used. If classifier is False or if ``y`` is
neither binary nor multiclass, :class:`KFold` is used.

Refer :ref:`User Guide <cross_validation>` for the various
cross-validators that can be used here.

n_jobs : integer, optional
Number of jobs to run in parallel (default 1).
Expand Down Expand Up @@ -91,16 +101,14 @@ def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
title = "Learning Curves (Naive Bayes)"
# Cross validation with 100 iterations to get smoother mean test and train
# score curves, each time with 20% data randomly selected as a validation set.
cv = cross_validation.ShuffleSplit(digits.data.shape[0], n_iter=100,
test_size=0.2, random_state=0)
cv = ShuffleSplit(n_iter=100, test_size=0.2, random_state=0)

estimator = GaussianNB()
plot_learning_curve(estimator, title, X, y, ylim=(0.7, 1.01), cv=cv, n_jobs=4)

title = "Learning Curves (SVM, RBF kernel, $\gamma=0.001$)"
# SVC is more expensive so we do a lower number of CV iterations:
cv = cross_validation.ShuffleSplit(digits.data.shape[0], n_iter=10,
test_size=0.2, random_state=0)
cv = ShuffleSplit(n_iter=10, test_size=0.2, random_state=0)
estimator = SVC(gamma=0.001)
plot_learning_curve(estimator, title, X, y, (0.7, 1.01), cv=cv, n_jobs=4)

Expand Down
2 changes: 1 addition & 1 deletion examples/model_selection/plot_precision_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
from sklearn import svm, datasets
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier

Expand Down
Loading