scikit-learn · vighneshbirodkar · Aug 24, 2015 · Sep 4, 2015 · Sep 4, 2015 · ogrisel
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -170,7 +170,7 @@ Here is an example of building custom scorers, and of using the
     >>> #  and predictions defined below.
     >>> loss  = make_scorer(my_custom_loss_func, greater_is_better=False)
     >>> score = make_scorer(my_custom_loss_func, greater_is_better=True)
-    >>> ground_truth = [1, 1]
+    >>> ground_truth = [[1, 1]]
     >>> predictions  = [0, 1]
     >>> from sklearn.dummy import DummyClassifier
     >>> clf = DummyClassifier(strategy='most_frequent', random_state=0)

diff --git a/doc/modules/model_persistence.rst b/doc/modules/model_persistence.rst
@@ -30,7 +30,7 @@ persistence model, namely `pickle <http://docs.python.org/library/pickle.html>`_
   >>> import pickle
   >>> s = pickle.dumps(clf)
   >>> clf2 = pickle.loads(s)
-  >>> clf2.predict(X[0])
+  >>> clf2.predict(X[0:1])
   array([0])
   >>> y[0]
   0

diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
@@ -5,7 +5,7 @@ An introduction to machine learning with scikit-learn
 
 .. topic:: Section contents
 
-    In this section, we introduce the `machine learning 
+    In this section, we introduce the `machine learning
     <http://en.wikipedia.org/wiki/Machine_learning>`_
     vocabulary that we use throughout scikit-learn and give a
     simple learning example.
@@ -14,30 +14,30 @@ An introduction to machine learning with scikit-learn
 Machine learning: the problem setting
 -------------------------------------
 
-In general, a learning problem considers a set of n 
+In general, a learning problem considers a set of n
 `samples <http://en.wikipedia.org/wiki/Sample_(statistics)>`_ of
 data and then tries to predict properties of unknown data. If each sample is
 more than a single number and, for instance, a multi-dimensional entry
-(aka `multivariate <http://en.wikipedia.org/wiki/Multivariate_random_variable>`_ 
+(aka `multivariate <http://en.wikipedia.org/wiki/Multivariate_random_variable>`_
 data), is it said to have several attributes or **features**.
 
 We can separate learning problems in a few large categories:
 
- * `supervised learning <http://en.wikipedia.org/wiki/Supervised_learning>`_, 
-   in which the data comes with additional attributes that we want to predict 
+ * `supervised learning <http://en.wikipedia.org/wiki/Supervised_learning>`_,
+   in which the data comes with additional attributes that we want to predict
    (:ref:`Click here <supervised-learning>`
-   to go to the scikit-learn supervised learning page).This problem 
+   to go to the scikit-learn supervised learning page).This problem
    can be either:
 
-    * `classification 
+    * `classification
       <http://en.wikipedia.org/wiki/Classification_in_machine_learning>`_:
       samples belong to two or more classes and we
       want to learn from already labeled data how to predict the class
       of unlabeled data. An example of classification problem would
-      be the handwritten digit recognition example, in which the aim is 
+      be the handwritten digit recognition example, in which the aim is
       to assign each input vector to one of a finite number of discrete
-      categories.  Another way to think of classification is as a discrete 
-      (as opposed to continuous) form of supervised learning where one has a 
+      categories.  Another way to think of classification is as a discrete
+      (as opposed to continuous) form of supervised learning where one has a
       limited number of categories and for each of the n samples provided,
       one is to try to label them with the correct category or class.
 
@@ -48,15 +48,15 @@ We can separate learning problems in a few large categories:
       length of a salmon as a function of its age and weight.
 
  * `unsupervised learning <http://en.wikipedia.org/wiki/Unsupervised_learning>`_,
-   in which the training data consists of a set of input vectors x 
-   without any corresponding target values. The goal in such problems 
-   may be to discover groups of similar examples within the data, where 
-   it is called `clustering <http://en.wikipedia.org/wiki/Cluster_analysis>`_, 
-   or to determine the distribution of data within the input space, known as 
-   `density estimation <http://en.wikipedia.org/wiki/Density_estimation>`_, or 
-   to project the data from a high-dimensional space down to two or three 
-   dimensions for the purpose of *visualization* 
-   (:ref:`Click here <unsupervised-learning>` 
+   in which the training data consists of a set of input vectors x
+   without any corresponding target values. The goal in such problems
+   may be to discover groups of similar examples within the data, where
+   it is called `clustering <http://en.wikipedia.org/wiki/Cluster_analysis>`_,
+   or to determine the distribution of data within the input space, known as
+   `density estimation <http://en.wikipedia.org/wiki/Density_estimation>`_, or
+   to project the data from a high-dimensional space down to two or three
+   dimensions for the purpose of *visualization*
+   (:ref:`Click here <unsupervised-learning>`
    to go to the Scikit-Learn unsupervised learning page).
 
 .. topic:: Training set and testing set
@@ -143,7 +143,7 @@ Learning and predicting
 
 In the case of the digits dataset, the task is to predict, given an image,
 which digit it represents. We are given samples of each of the 10
-possible classes (the digits zero through nine) on which we *fit* an 
+possible classes (the digits zero through nine) on which we *fit* an
 `estimator <http://en.wikipedia.org/wiki/Estimator>`_ to be able to *predict*
 the classes to which unseen samples belong.
 
@@ -185,7 +185,7 @@ Now you can predict new values, in particular, we can ask to the
 classifier what is the digit of our last image in the ``digits`` dataset,
 which we have not used to train the classifier::
 
-  >>> clf.predict(digits.data[-1])
+  >>> clf.predict(digits.data[-1:])
   array([8])
 
 The corresponding image is the following:
@@ -223,7 +223,7 @@ persistence model, namely `pickle <http://docs.python.org/library/pickle.html>`_
   >>> import pickle
   >>> s = pickle.dumps(clf)
   >>> clf2 = pickle.loads(s)
-  >>> clf2.predict(X[0])
+  >>> clf2.predict(X[0:1])
   array([0])
   >>> y[0]
   0
@@ -235,10 +235,10 @@ and not to a string::
 
   >>> from sklearn.externals import joblib
   >>> joblib.dump(clf, 'filename.pkl') # doctest: +SKIP
-  
+
 Later you can load back the pickled model (possibly in another Python process)
 with::
-  
+
   >>> clf = joblib.load('filename.pkl') # doctest:+SKIP
 
 .. note::

diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py
@@ -713,7 +713,7 @@ def fit(self, X, y=None):
         -------
         self
         """
-        X = check_array(X)
+        X = check_array(X, ensure_min_samples=2)
         memory = self.memory
         if isinstance(memory, six.string_types):
             memory = Memory(cachedir=memory, verbose=0)
@@ -869,11 +869,8 @@ def fit(self, X, y=None, **params):
         -------
         self
         """
-        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
-        if not (len(X.shape) == 2 and X.shape[0] > 0):
-            raise ValueError('At least one sample is required to fit the '
-                             'model. A data matrix of shape %s was given.'
-                             % (X.shape, ))
+        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
+                        ensure_min_features=2)
         return AgglomerativeClustering.fit(self, X.T, **params)
 
     @property

diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
@@ -95,7 +95,8 @@ def _k_init(X, n_clusters, x_squared_norms, random_state, n_local_trials=None):
 
     # Initialize list of closest distances and calculate current potential
     closest_dist_sq = euclidean_distances(
-        centers[0], X, Y_norm_squared=x_squared_norms, squared=True)
+        centers[0, np.newaxis], X, Y_norm_squared=x_squared_norms,
+        squared=True)
     current_pot = closest_dist_sq.sum()
 
     # Pick the remaining n_clusters-1 points

diff --git a/sklearn/covariance/empirical_covariance_.py b/sklearn/covariance/empirical_covariance_.py
@@ -70,6 +70,7 @@ def empirical_covariance(X, assume_centered=False):
     X = np.asarray(X)
     if X.ndim == 1:
         X = np.reshape(X, (1, -1))
+
     if X.shape[0] == 1:
         warnings.warn("Only one sample available. "
                       "You may want to reshape your data array")
@@ -79,6 +80,8 @@ def empirical_covariance(X, assume_centered=False):
     else:
         covariance = np.cov(X.T, bias=1)
 
+    if covariance.ndim == 0:
+        covariance = np.array([[covariance]])
     return covariance
 
 

diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py
@@ -334,7 +334,10 @@ def __init__(self, alpha=.01, mode='cd', tol=1e-4, enet_tol=1e-4,
         self.store_precision = True
 
     def fit(self, X, y=None):
-        X = check_array(X)
+
+        # Covariance does not make sense for a single feature
+        X = check_array(X, ensure_min_features=2, ensure_min_samples=2)
+
         if self.assume_centered:
             self.location_ = np.zeros(X.shape[1])
         else:
@@ -557,7 +560,8 @@ def fit(self, X, y=None):
         X : ndarray, shape (n_samples, n_features)
             Data from which to compute the covariance estimate
         """
-        X = check_array(X)
+        # Covariance does not make sense for a single feature
+        X = check_array(X, ensure_min_features=2)
         if self.assume_centered:
             self.location_ = np.zeros(X.shape[1])
         else:

diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py
@@ -55,8 +55,8 @@ def test_covariance():
         cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0)
 
     # test with one sample
-    # FIXME I don't know what this test does
-    X_1sample = np.arange(5)
+    # Create X with 1 sample and 5 features
+    X_1sample = np.arange(5).reshape(1, 5)
     cov = EmpiricalCovariance()
     assert_warns(UserWarning, cov.fit, X_1sample)
     assert_array_almost_equal(cov.covariance_,
@@ -172,8 +172,8 @@ def test_ledoit_wolf():
     assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)
 
     # test with one sample
-    # FIXME I don't know what this test does
-    X_1sample = np.arange(5)
+    # warning should be raised when using only 1 sample
+    X_1sample = np.arange(5).reshape(1, 5)
     lw = LedoitWolf()
     assert_warns(UserWarning, lw.fit, X_1sample)
     assert_array_almost_equal(lw.covariance_,
@@ -220,7 +220,7 @@ def test_oas():
     assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)
 
     # test with n_features = 1
-    X_1d = X[:, 0].reshape((-1, 1))
+    X_1d = X[:, 0:1]
     oa = OAS(assume_centered=True)
     oa.fit(X_1d)
     oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
@@ -259,8 +259,8 @@ def test_oas():
     assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)
 
     # test with one sample
-    # FIXME I don't know what this test does
-    X_1sample = np.arange(5)
+    # warning should be raised when using only 1 sample
+    X_1sample = np.arange(5).reshape(1, 5)
     oa = OAS()
     assert_warns(UserWarning, oa.fit, X_1sample)
     assert_array_almost_equal(oa.covariance_,

diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
@@ -75,11 +75,11 @@ def test_dict_learning_nonzero_coefs():
     n_components = 4
     dico = DictionaryLearning(n_components, transform_algorithm='lars',
                               transform_n_nonzero_coefs=3, random_state=0)
-    code = dico.fit(X).transform(X[1])
+    code = dico.fit(X).transform(X[np.newaxis, 1])
     assert_true(len(np.flatnonzero(code)) == 3)
 
     dico.set_params(transform_algorithm='omp')
-    code = dico.transform(X[1])
+    code = dico.transform(X[np.newaxis, 1])
     assert_equal(len(np.flatnonzero(code)), 3)
 
 
@@ -173,7 +173,7 @@ def test_dict_learning_online_partial_fit():
                                         random_state=0)
     for i in range(10):
         for sample in X:
-            dict2.partial_fit(sample)
+            dict2.partial_fit(sample[np.newaxis, :])
 
     assert_true(not np.all(sparse_encode(X, dict1.components_, alpha=1) ==
                            0))
@@ -225,4 +225,4 @@ def test_sparse_coder_estimator():
     code = SparseCoder(dictionary=V, transform_algorithm='lasso_lars',
                        transform_alpha=0.001).transform(X)
     assert_true(not np.all(code == 0))
-    assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
+    assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
@@ -39,6 +39,7 @@
 
 from sklearn.tree.tree import SPARSE_SPLITTERS
 
+
 # toy sample
 X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
 y = [-1, -1, -1, 1, 1, 1]
@@ -724,6 +725,7 @@ def test_memory_layout():
         yield check_memory_layout, name, dtype
 
 
+@ignore_warnings
 def check_1d_input(name, X, X_2d, y):
     ForestEstimator = FOREST_ESTIMATORS[name]
     assert_raises(ValueError, ForestEstimator(random_state=0).fit, X, y)
@@ -735,8 +737,9 @@ def check_1d_input(name, X, X_2d, y):
         assert_raises(ValueError, est.predict, X)
 
 
+@ignore_warnings
 def test_1d_input():
-    X = iris.data[:, 0].ravel()
+    X = iris.data[:, 0]
     X_2d = iris.data[:, 0].reshape((-1, 1))
     y = iris.target
 

diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -311,7 +311,7 @@ def test_check_inputs_predict():
     x = np.array([1.0, 2.0])[:, np.newaxis]
     assert_raises(ValueError, clf.predict, x)
 
-    x = np.array([])
+    x = np.array([[]])
     assert_raises(ValueError, clf.predict, x)
 
     x = np.array([1.0, 2.0, 3.0])[:, np.newaxis]
@@ -323,7 +323,7 @@ def test_check_inputs_predict():
     x = np.array([1.0, 2.0])[:, np.newaxis]
     assert_raises(ValueError, clf.predict, x)
 
-    x = np.array([])
+    x = np.array([[]])
     assert_raises(ValueError, clf.predict, x)
 
     x = np.array([1.0, 2.0, 3.0])[:, np.newaxis]
@@ -492,9 +492,9 @@ def test_degenerate_targets():
 
     clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
     clf.fit(X, np.ones(len(X)))
-    clf.predict(rng.rand(2))
+    clf.predict([rng.rand(2)])
     assert_array_equal(np.ones((1,), dtype=np.float64),
-                       clf.predict(rng.rand(2)))
+                       clf.predict([rng.rand(2)]))
 
 
 def test_quantile_loss():

diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py
@@ -103,7 +103,8 @@ def inverse_transform(self, X):
             # insert additional entries in indptr:
             # e.g. if transform changed indptr from [0 2 6 7] to [0 2 3]
             # col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3]
-            col_nonzeros = self.inverse_transform(np.diff(X.indptr)).ravel()
+            it = self.inverse_transform(np.diff(X.indptr).reshape(1, -1))
+            col_nonzeros = it.ravel()
             indptr = np.concatenate([[0], np.cumsum(col_nonzeros)])
             Xt = csc_matrix((X.data, X.indices, indptr),
                             shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype)

diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py
@@ -50,7 +50,7 @@ def test_transform_dense():
     assert_equal(np.float32, sel.transform(X.astype(np.float32)).dtype)
 
     # Check 1d list and other dtype:
-    names_t_actual = sel.transform(feature_names)
+    names_t_actual = sel.transform([feature_names])
     assert_array_equal(feature_names_t, names_t_actual.ravel())
 
     # Check wrong shape raises error
@@ -85,7 +85,7 @@ def test_inverse_transform_dense():
                  sel.inverse_transform(Xt.astype(np.float32)).dtype)
 
     # Check 1d list and other dtype:
-    names_inv_actual = sel.inverse_transform(feature_names_t)
+    names_inv_actual = sel.inverse_transform([feature_names_t])
     assert_array_equal(feature_names_inv, names_inv_actual.ravel())
 
     # Check wrong shape raises error

diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
@@ -491,7 +491,7 @@ def test_tied_scores():
 
     for n_features in [1, 2, 3]:
         sel = SelectKBest(chi2, k=n_features).fit(X_train, y_train)
-        X_test = sel.transform([0, 1, 2])
+        X_test = sel.transform([[0, 1, 2]])
         assert_array_equal(X_test[0], np.arange(3)[-n_features:])
 
 

diff --git a/sklearn/feature_selection/tests/test_variance_threshold.py b/sklearn/feature_selection/tests/test_variance_threshold.py
@@ -17,7 +17,7 @@ def test_zero_variance():
         sel = VarianceThreshold().fit(X)
         assert_array_equal([0, 1, 3, 4], sel.get_support(indices=True))
 
-    assert_raises(ValueError, VarianceThreshold().fit, [0, 1, 2, 3])
+    assert_raises(ValueError, VarianceThreshold().fit, [[0, 1, 2, 3]])
     assert_raises(ValueError, VarianceThreshold().fit, [[0, 1], [0, 1]])
 
 

diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py
@@ -219,8 +219,8 @@ def chi2(X, y):
 
     observed = safe_sparse_dot(Y.T, X)          # n_classes * n_features
 
-    feature_count = check_array(X.sum(axis=0))
-    class_prob = check_array(Y.mean(axis=0))
+    feature_count = X.sum(axis=0).reshape(1, -1)
+    class_prob = Y.mean(axis=0).reshape(1, -1)
     expected = np.dot(class_prob.T, feature_count)
 
     return _chisquare(observed, expected)