Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions sklearn/model_selection/_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ class _BaseKFold(with_metaclass(ABCMeta, BaseCrossValidator)):

@abstractmethod
def __init__(self, n_splits, shuffle, random_state):
if not isinstance(n_splits, numbers.Integral):
if not isinstance(n_splits, (numbers.Integral, np.integer)):
raise ValueError('The number of folds must be of Integral type. '
'%s of type %s was passed.'
% (n_splits, type(n_splits)))
Expand Down Expand Up @@ -1308,27 +1308,27 @@ def _validate_shuffle_split_init(test_size, train_size):
raise ValueError('test_size and train_size can not both be None')

if test_size is not None:
if np.asarray(test_size).dtype.kind == 'f':
if isinstance(test_size, (float, np.floating)):
if test_size >= 1.:
raise ValueError(
'test_size=%f should be smaller '
'than 1.0 or be an integer' % test_size)
elif np.asarray(test_size).dtype.kind != 'i':
elif not isinstance(test_size, (numbers.Integral, np.integer)):
# int values are checked during split based on the input
raise ValueError("Invalid value for test_size: %r" % test_size)

if train_size is not None:
if np.asarray(train_size).dtype.kind == 'f':
if isinstance(train_size, (float, np.floating)):
if train_size >= 1.:
raise ValueError("train_size=%f should be smaller "
"than 1.0 or be an integer" % train_size)
elif (np.asarray(test_size).dtype.kind == 'f' and
elif (isinstance(test_size, (float, np.floating)) and
(train_size + test_size) > 1.):
raise ValueError('The sum of test_size and train_size = %f, '
'should be smaller than 1.0. Reduce '
'test_size and/or train_size.' %
(train_size + test_size))
elif np.asarray(train_size).dtype.kind != 'i':
elif not isinstance(train_size, (numbers.Integral, np.integer)):
# int values are checked during split based on the input
raise ValueError("Invalid value for train_size: %r" % train_size)

Expand All @@ -1338,24 +1338,26 @@ def _validate_shuffle_split(n_samples, test_size, train_size):
Validation helper to check if the test/test sizes are meaningful wrt to the
size of the data (n_samples)
"""
if (test_size is not None and np.asarray(test_size).dtype.kind == 'i' and
if (test_size is not None and
isinstance(test_size, (numbers.Integral, np.integer)) and
test_size >= n_samples):
raise ValueError('test_size=%d should be smaller than the number of '
'samples %d' % (test_size, n_samples))

if (train_size is not None and np.asarray(train_size).dtype.kind == 'i' and
if (train_size is not None and
isinstance(train_size, (numbers.Integral, np.integer)) and
train_size >= n_samples):
raise ValueError("train_size=%d should be smaller than the number of"
" samples %d" % (train_size, n_samples))

if np.asarray(test_size).dtype.kind == 'f':
if isinstance(test_size, (float, np.floating)):
n_test = ceil(test_size * n_samples)
elif np.asarray(test_size).dtype.kind == 'i':
elif isinstance(test_size, (numbers.Integral, np.integer)):
n_test = float(test_size)

if train_size is None:
n_train = n_samples - n_test
elif np.asarray(train_size).dtype.kind == 'f':
elif isinstance(train_size, (float, np.floating)):
n_train = floor(train_size * n_samples)
else:
n_train = float(train_size)
Expand Down Expand Up @@ -1552,7 +1554,7 @@ def check_cv(cv=3, y=None, classifier=False):
if cv is None:
cv = 3

if isinstance(cv, numbers.Integral):
if isinstance(cv, (numbers.Integral, np.integer)):
if (classifier and (y is not None) and
(type_of_target(y) in ('binary', 'multiclass'))):
return StratifiedKFold(cv)
Expand Down
7 changes: 6 additions & 1 deletion sklearn/model_selection/tests/test_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,15 +491,20 @@ def test_kfold_can_detect_dependent_samples_on_digits(): # see #2372


def test_shuffle_split():
# Use numpy float as input
ss0 = ShuffleSplit(test_size=np.float16(0.2), random_state=0).split(X)
ss1 = ShuffleSplit(test_size=0.2, random_state=0).split(X)
ss2 = ShuffleSplit(test_size=2, random_state=0).split(X)
# Use numpy int as input
ss3 = ShuffleSplit(test_size=np.int32(2), random_state=0).split(X)
for typ in six.integer_types:
ss4 = ShuffleSplit(test_size=typ(2), random_state=0).split(X)
for t1, t2, t3, t4 in zip(ss1, ss2, ss3, ss4):
for t0, t1, t2, t3, t4 in zip(ss0, ss1, ss2, ss3, ss4):
assert_array_equal(t0[0], t1[0])
assert_array_equal(t1[0], t2[0])
assert_array_equal(t2[0], t3[0])
assert_array_equal(t3[0], t4[0])
assert_array_equal(t0[1], t1[1])
assert_array_equal(t1[1], t2[1])
assert_array_equal(t2[1], t3[1])
assert_array_equal(t3[1], t4[1])
Expand Down