diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 3570d688ff70f..dcd26dbac0044 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -266,7 +266,7 @@ class _BaseKFold(with_metaclass(ABCMeta, BaseCrossValidator)): @abstractmethod def __init__(self, n_splits, shuffle, random_state): - if not isinstance(n_splits, numbers.Integral): + if not isinstance(n_splits, (numbers.Integral, np.integer)): raise ValueError('The number of folds must be of Integral type. ' '%s of type %s was passed.' % (n_splits, type(n_splits))) @@ -1308,27 +1308,27 @@ def _validate_shuffle_split_init(test_size, train_size): raise ValueError('test_size and train_size can not both be None') if test_size is not None: - if np.asarray(test_size).dtype.kind == 'f': + if isinstance(test_size, (float, np.floating)): if test_size >= 1.: raise ValueError( 'test_size=%f should be smaller ' 'than 1.0 or be an integer' % test_size) - elif np.asarray(test_size).dtype.kind != 'i': + elif not isinstance(test_size, (numbers.Integral, np.integer)): # int values are checked during split based on the input raise ValueError("Invalid value for test_size: %r" % test_size) if train_size is not None: - if np.asarray(train_size).dtype.kind == 'f': + if isinstance(train_size, (float, np.floating)): if train_size >= 1.: raise ValueError("train_size=%f should be smaller " "than 1.0 or be an integer" % train_size) - elif (np.asarray(test_size).dtype.kind == 'f' and + elif (isinstance(test_size, (float, np.floating)) and (train_size + test_size) > 1.): raise ValueError('The sum of test_size and train_size = %f, ' 'should be smaller than 1.0. Reduce ' 'test_size and/or train_size.' % (train_size + test_size)) - elif np.asarray(train_size).dtype.kind != 'i': + elif not isinstance(train_size, (numbers.Integral, np.integer)): # int values are checked during split based on the input raise ValueError("Invalid value for train_size: %r" % train_size) @@ -1338,24 +1338,26 @@ def _validate_shuffle_split(n_samples, test_size, train_size): Validation helper to check if the test/test sizes are meaningful wrt to the size of the data (n_samples) """ - if (test_size is not None and np.asarray(test_size).dtype.kind == 'i' and + if (test_size is not None and + isinstance(test_size, (numbers.Integral, np.integer)) and test_size >= n_samples): raise ValueError('test_size=%d should be smaller than the number of ' 'samples %d' % (test_size, n_samples)) - if (train_size is not None and np.asarray(train_size).dtype.kind == 'i' and + if (train_size is not None and + isinstance(train_size, (numbers.Integral, np.integer)) and train_size >= n_samples): raise ValueError("train_size=%d should be smaller than the number of" " samples %d" % (train_size, n_samples)) - if np.asarray(test_size).dtype.kind == 'f': + if isinstance(test_size, (float, np.floating)): n_test = ceil(test_size * n_samples) - elif np.asarray(test_size).dtype.kind == 'i': + elif isinstance(test_size, (numbers.Integral, np.integer)): n_test = float(test_size) if train_size is None: n_train = n_samples - n_test - elif np.asarray(train_size).dtype.kind == 'f': + elif isinstance(train_size, (float, np.floating)): n_train = floor(train_size * n_samples) else: n_train = float(train_size) @@ -1552,7 +1554,7 @@ def check_cv(cv=3, y=None, classifier=False): if cv is None: cv = 3 - if isinstance(cv, numbers.Integral): + if isinstance(cv, (numbers.Integral, np.integer)): if (classifier and (y is not None) and (type_of_target(y) in ('binary', 'multiclass'))): return StratifiedKFold(cv) diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index ec4f07aef8e9a..7a657add65d25 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -491,15 +491,20 @@ def test_kfold_can_detect_dependent_samples_on_digits(): # see #2372 def test_shuffle_split(): + # Use numpy float as input + ss0 = ShuffleSplit(test_size=np.float16(0.2), random_state=0).split(X) ss1 = ShuffleSplit(test_size=0.2, random_state=0).split(X) ss2 = ShuffleSplit(test_size=2, random_state=0).split(X) + # Use numpy int as input ss3 = ShuffleSplit(test_size=np.int32(2), random_state=0).split(X) for typ in six.integer_types: ss4 = ShuffleSplit(test_size=typ(2), random_state=0).split(X) - for t1, t2, t3, t4 in zip(ss1, ss2, ss3, ss4): + for t0, t1, t2, t3, t4 in zip(ss0, ss1, ss2, ss3, ss4): + assert_array_equal(t0[0], t1[0]) assert_array_equal(t1[0], t2[0]) assert_array_equal(t2[0], t3[0]) assert_array_equal(t3[0], t4[0]) + assert_array_equal(t0[1], t1[1]) assert_array_equal(t1[1], t2[1]) assert_array_equal(t2[1], t3[1]) assert_array_equal(t3[1], t4[1])