Closed
Description
Description
groups parameter in model_selection.cross_val_score() is not propagated in to RandomSearchCV.fit() call. This is similar to #2879 and probably best addressed in #4497.
Steps/Code to Reproduce
import numpy as np
from sklearn.utils.validation import indexable
from sklearn import linear_model
from sklearn import model_selection
# generate data with simple decision boundary, with 2 labels and 2 groups per label
X = np.array(range(20)).reshape(-1, 1)
y = np.array([0] * 10 + [1] * 10)
groups = np.array([0] * 5 + [1] * 5 + [2] * 5 + [3] * 5)
# run nested cross-validation (works with StratifiedKFold, but not GroupKFold)
clf = linear_model.LogisticRegression()
#cv = model_selection.StratifiedKFold(n_splits=2)
cv = model_selection.GroupKFold(n_splits=2)
param_dist = {'penalty': ['l1', 'l2'], 'C': np.logspace(-3, 3, 13)}
random_search = model_selection.RandomizedSearchCV(clf, cv=cv, param_distributions=param_dist, n_iter=20)
print model_selection.cross_val_score(random_search, X, y=y, groups=groups, cv=cv)
Expected Results
When StratifiedKFold is used, the output is [ 0.8 0.7]. In general, it should be an array of 2 floats.
Actual Results
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/model_selection/_validation.py", line 140, in cross_val_score
for train, test in cv.split(X, y, groups))
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
while self.dispatch_one_batch(iterator):
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
self._dispatch(tasks)
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
result = ImmediateResult(func)
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 322, in __init__
self.results = batch()
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/model_selection/_validation.py", line 238, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/model_selection/_search.py", line 1185, in fit
return self._fit(X, y, groups, sampled_params)
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/model_selection/_search.py", line 562, in _fit
for parameters in parameter_iterable
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
while self.dispatch_one_batch(iterator):
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 603, in dispatch_one_batch
tasks = BatchedCalls(itertools.islice(iterator, batch_size))
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 127, in __init__
self.items = list(iterator_slice)
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/model_selection/_search.py", line 563, in <genexpr>
for train, test in cv.split(X, y, groups))
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/model_selection/_split.py", line 321, in split
for train, test in super(_BaseKFold, self).split(X, y, groups):
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/model_selection/_split.py", line 90, in split
for test_index in self._iter_test_masks(X, y, groups):
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/model_selection/_split.py", line 102, in _iter_test_masks
for test_index in self._iter_test_indices(X, y, groups):
File "/Users/davidslater/.virtualenvs/davidslater/lib/python2.7/site-packages/sklearn/model_selection/_split.py", line 474, in _iter_test_indices
raise ValueError("The groups parameter should not be None")
ValueError: The groups parameter should not be None
Versions
Darwin-15.6.0-x86_64-i386-64bit
('Python', '2.7.11 (default, Jan 22 2016, 08:29:18) \n[GCC 4.2.1 Compatible Apple LLVM 7.0.2 (clang-700.1.81)]')
('NumPy', '1.11.2')
('SciPy', '0.18.1')
('Scikit-Learn', '0.18')