scikit-learn · amueller · Apr 3, 2015 · Apr 4, 2015 · Apr 5, 2015 · Apr 5, 2015
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
@@ -94,7 +94,7 @@ def _k_init(X, n_clusters, x_squared_norms, random_state, n_local_trials=None):
 
     # Initialize list of closest distances and calculate current potential
     closest_dist_sq = euclidean_distances(
-        centers[0], X, Y_norm_squared=x_squared_norms, squared=True)
+        centers[0].reshape(1, -1), X, Y_norm_squared=x_squared_norms, squared=True)
     current_pot = closest_dist_sq.sum()
 
     # Pick the remaining n_clusters-1 points

diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py
@@ -171,14 +171,6 @@ def test_ledoit_wolf():
     assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
     assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)
 
-    # test with one sample
-    # FIXME I don't know what this test does
-    X_1sample = np.arange(5)
-    lw = LedoitWolf()
-    assert_warns(UserWarning, lw.fit, X_1sample)
-    assert_array_almost_equal(lw.covariance_,
-                              np.zeros(shape=(5, 5), dtype=np.float64))
-
     # test shrinkage coeff on a simple data set (without saving precision)
     lw = LedoitWolf(store_precision=False)
     lw.fit(X)
@@ -258,14 +250,6 @@ def test_oas():
     assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
     assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)
 
-    # test with one sample
-    # FIXME I don't know what this test does
-    X_1sample = np.arange(5)
-    oa = OAS()
-    assert_warns(UserWarning, oa.fit, X_1sample)
-    assert_array_almost_equal(oa.covariance_,
-                              np.zeros(shape=(5, 5), dtype=np.float64))
-
     # test shrinkage coeff on a simple data set (without saving precision)
     oa = OAS(store_precision=False)
     oa.fit(X)

diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
@@ -51,11 +51,11 @@ def test_dict_learning_nonzero_coefs():
     n_components = 4
     dico = DictionaryLearning(n_components, transform_algorithm='lars',
                               transform_n_nonzero_coefs=3, random_state=0)
-    code = dico.fit(X).transform(X[1])
+    code = dico.fit(X).transform([X[1]])
     assert_true(len(np.flatnonzero(code)) == 3)
 
     dico.set_params(transform_algorithm='omp')
-    code = dico.transform(X[1])
+    code = dico.transform([X[1]])
     assert_equal(len(np.flatnonzero(code)), 3)
 
 
@@ -149,7 +149,7 @@ def test_dict_learning_online_partial_fit():
                                         random_state=0)
     for i in range(10):
         for sample in X:
-            dict2.partial_fit(sample)
+            dict2.partial_fit([sample])
 
     assert_true(not np.all(sparse_encode(X, dict1.components_, alpha=1) ==
                            0))

diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
@@ -725,26 +725,6 @@ def test_memory_layout():
         yield check_memory_layout, name, dtype
 
 
-def check_1d_input(name, X, X_2d, y):
-    ForestEstimator = FOREST_ESTIMATORS[name]
-    assert_raises(ValueError, ForestEstimator(random_state=0).fit, X, y)
-
-    est = ForestEstimator(random_state=0)
-    est.fit(X_2d, y)
-
-    if name in FOREST_CLASSIFIERS or name in FOREST_REGRESSORS:
-        assert_raises(ValueError, est.predict, X)
-
-
-def test_1d_input():
-    X = iris.data[:, 0].ravel()
-    X_2d = iris.data[:, 0].reshape((-1, 1))
-    y = iris.target
-
-    for name in FOREST_ESTIMATORS:
-        yield check_1d_input, name, X, X_2d, y
-
-
 def check_class_weights(name):
     # Check class_weights resemble sample_weights behavior.
     ForestClassifier = FOREST_CLASSIFIERS[name]

diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -492,9 +492,9 @@ def test_degenerate_targets():
 
     clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
     clf.fit(X, np.ones(len(X)))
-    clf.predict(rng.rand(2))
+    clf.predict(rng.rand(1, 2))
     assert_array_equal(np.ones((1,), dtype=np.float64),
-                       clf.predict(rng.rand(2)))
+                       clf.predict(rng.rand(1, 2)))
 
 
 def test_quantile_loss():
@@ -989,15 +989,14 @@ def test_non_uniform_weights_toy_min_weight_leaf():
     X = [[1, 0],
          [1, 0],
          [1, 0],
-         [0, 1],
-        ]
+         [0, 1]]
     y = [0, 0, 1, 0]
     # ignore the first 2 training samples by setting their weight to 0
     sample_weight = [0, 0, 1, 1]
     gb = GradientBoostingRegressor(n_estimators=5, min_weight_fraction_leaf=0.1)
     gb.fit(X, y, sample_weight=sample_weight)
     assert_true(gb.predict([[1, 0]])[0] > 0.5)
-    assert_almost_equal(gb.estimators_[0,0].splitter.min_weight_leaf, 0.2)
+    assert_almost_equal(gb.estimators_[0, 0].splitter.min_weight_leaf, 0.2)
 
 
 def test_non_uniform_weights_toy_edge_case_clf():

diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py
@@ -103,7 +103,8 @@ def inverse_transform(self, X):
             # insert additional entries in indptr:
             # e.g. if transform changed indptr from [0 2 6 7] to [0 2 3]
             # col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3]
-            col_nonzeros = self.inverse_transform(np.diff(X.indptr)).ravel()
+            col_nonzeros = self.inverse_transform(
+                np.diff(X.indptr).reshape(1, -1)).ravel()
             indptr = np.concatenate([[0], np.cumsum(col_nonzeros)])
             Xt = csc_matrix((X.data, X.indices, indptr),
                             shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype)

diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py
@@ -50,7 +50,7 @@ def test_transform_dense():
     assert_equal(np.float32, sel.transform(X.astype(np.float32)).dtype)
 
     # Check 1d list and other dtype:
-    names_t_actual = sel.transform(feature_names)
+    names_t_actual = sel.transform([feature_names])
     assert_array_equal(feature_names_t, names_t_actual.ravel())
 
     # Check wrong shape raises error
@@ -85,7 +85,7 @@ def test_inverse_transform_dense():
                  sel.inverse_transform(Xt.astype(np.float32)).dtype)
 
     # Check 1d list and other dtype:
-    names_inv_actual = sel.inverse_transform(feature_names_t)
+    names_inv_actual = sel.inverse_transform([feature_names_t])
     assert_array_equal(feature_names_inv, names_inv_actual.ravel())
 
     # Check wrong shape raises error

diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
@@ -491,7 +491,7 @@ def test_tied_scores():
 
     for n_features in [1, 2, 3]:
         sel = SelectKBest(chi2, k=n_features).fit(X_train, y_train)
-        X_test = sel.transform([0, 1, 2])
+        X_test = sel.transform([[0, 1, 2]])
         assert_array_equal(X_test[0], np.arange(3)[-n_features:])
 
 

diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py
@@ -213,8 +213,8 @@ def chi2(X, y):
 
     observed = safe_sparse_dot(Y.T, X)          # n_classes * n_features
 
-    feature_count = check_array(X.sum(axis=0))
-    class_prob = check_array(Y.mean(axis=0))
+    feature_count = X.sum(axis=0).reshape(1, -1)
+    class_prob = Y.mean(axis=0).reshape(1, -1)
     expected = np.dot(class_prob.T, feature_count)
 
     return _chisquare(observed, expected)

diff --git a/sklearn/gaussian_process/gaussian_process.py b/sklearn/gaussian_process/gaussian_process.py
@@ -769,9 +769,9 @@ def minus_reduced_likelihood_function(log10t):
             # Initialize under isotropy assumption
             if verbose:
                 print("Initialize under isotropy assumption...")
-            self.theta0 = check_array(self.theta0.min())
-            self.thetaL = check_array(self.thetaL.min())
-            self.thetaU = check_array(self.thetaU.max())
+            self.theta0 = np.atleast_2d(self.theta0.min())
+            self.thetaL = np.atleast_2d(self.thetaL.min())
+            self.thetaU = np.atleast_2d(self.thetaU.max())
             theta_iso, optimal_rlf_value_iso, par_iso = \
                 self._arg_max_reduced_likelihood_function()
             optimal_theta = theta_iso + np.zeros(theta0.shape)
@@ -782,16 +782,16 @@ def minus_reduced_likelihood_function(log10t):
             for i in self.random_state.permutation(theta0.size):
                 if verbose:
                     print("Proceeding along dimension %d..." % (i + 1))
-                self.theta0 = check_array(theta_iso)
-                self.thetaL = check_array(thetaL[0, i])
-                self.thetaU = check_array(thetaU[0, i])
+                self.theta0 = np.atleast_2d(theta_iso)
+                self.thetaL = np.atleast_2d(thetaL[0, i])
+                self.thetaU = np.atleast_2d(thetaU[0, i])
 
                 def corr_cut(t, d):
-                    return corr(check_array(np.hstack([optimal_theta[0][0:i],
-                                                       t[0],
-                                                       optimal_theta[0][(i +
-                                                                         1)::]])),
-                                d)
+                    return corr(
+                        np.atleast_2d(
+                            np.hstack([optimal_theta[0][0:i], t[0],
+                                       optimal_theta[0][(i + 1)::]])),
+                        d)
 
                 self.corr = corr_cut
                 optimal_theta[0, i], optimal_rlf_value, optimal_par = \
@@ -824,7 +824,7 @@ def _check_params(self, n_samples=None):
 
         # Check regression weights if given (Ordinary Kriging)
         if self.beta0 is not None:
-            self.beta0 = check_array(self.beta0)
+            self.beta0 = np.atleast_2d(self.beta0)
             if self.beta0.shape[1] != 1:
                 # Force to column vector
                 self.beta0 = self.beta0.T
@@ -844,12 +844,12 @@ def _check_params(self, n_samples=None):
                              "'light', %s was given." % self.storage_mode)
 
         # Check correlation parameters
-        self.theta0 = check_array(self.theta0)
+        self.theta0 = np.atleast_2d(self.theta0)
         lth = self.theta0.size
 
         if self.thetaL is not None and self.thetaU is not None:
-            self.thetaL = check_array(self.thetaL)
-            self.thetaU = check_array(self.thetaU)
+            self.thetaL = np.atleast_2d(self.thetaL)
+            self.thetaU = np.atleast_2d(self.thetaU)
             if self.thetaL.size != lth or self.thetaU.size != lth:
                 raise ValueError("theta0, thetaL and thetaU must have the "
                                  "same length.")

diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
@@ -257,6 +257,7 @@ def test_late_onset_averaging_reached(self):
 class DenseSGDClassifierTestCase(unittest.TestCase, CommonTest):
     """Test suite for the dense representation variant of SGD"""
     factory_class = SGDClassifier
+
     def test_sgd(self):
         # Check that SGD gives any results :-)
 
@@ -390,7 +391,7 @@ def test_sgd_multiclass(self):
         clf = self.factory(alpha=0.01, n_iter=20).fit(X2, Y2)
         assert_equal(clf.coef_.shape, (3, 2))
         assert_equal(clf.intercept_.shape, (3,))
-        assert_equal(clf.decision_function([0, 0]).shape, (1, 3))
+        assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
         pred = clf.predict(T2)
         assert_array_equal(pred, true_result2)
 
@@ -432,7 +433,7 @@ def test_sgd_multiclass_njobs(self):
         clf = self.factory(alpha=0.01, n_iter=20, n_jobs=2).fit(X2, Y2)
         assert_equal(clf.coef_.shape, (3, 2))
         assert_equal(clf.intercept_.shape, (3,))
-        assert_equal(clf.decision_function([0, 0]).shape, (1, 3))
+        assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
         pred = clf.predict(T2)
         assert_array_equal(pred, true_result2)
 
@@ -469,14 +470,14 @@ def test_sgd_proba(self):
         for loss in ["log", "modified_huber"]:
             clf = self.factory(loss="modified_huber", alpha=0.01, n_iter=10)
             clf.fit(X, Y)
-            p = clf.predict_proba([3, 2])
+            p = clf.predict_proba([[3, 2]])
             assert_true(p[0, 1] > 0.5)
-            p = clf.predict_proba([-1, -1])
+            p = clf.predict_proba([[-1, -1]])
             assert_true(p[0, 1] < 0.5)
 
-            p = clf.predict_log_proba([3, 2])
+            p = clf.predict_log_proba([[3, 2]])
             assert_true(p[0, 1] > p[0, 0])
-            p = clf.predict_log_proba([-1, -1])
+            p = clf.predict_log_proba([[-1, -1]])
             assert_true(p[0, 1] < p[0, 0])
 
         # log loss multiclass probability estimates
@@ -488,25 +489,25 @@ def test_sgd_proba(self):
         assert_almost_equal(p[0].sum(), 1)
         assert_true(np.all(p[0] >= 0))
 
-        p = clf.predict_proba([-1, -1])
-        d = clf.decision_function([-1, -1])
+        p = clf.predict_proba([[-1, -1]])
+        d = clf.decision_function([[-1, -1]])
         assert_array_equal(np.argsort(p[0]), np.argsort(d[0]))
 
-        l = clf.predict_log_proba([3, 2])
-        p = clf.predict_proba([3, 2])
+        l = clf.predict_log_proba([[3, 2]])
+        p = clf.predict_proba([[3, 2]])
         assert_array_almost_equal(np.log(p), l)
 
-        l = clf.predict_log_proba([-1, -1])
-        p = clf.predict_proba([-1, -1])
+        l = clf.predict_log_proba([[-1, -1]])
+        p = clf.predict_proba([[-1, -1]])
         assert_array_almost_equal(np.log(p), l)
 
         # Modified Huber multiclass probability estimates; requires a separate
         # test because the hard zero/one probabilities may destroy the
         # ordering present in decision_function output.
         clf = self.factory(loss="modified_huber", alpha=0.01, n_iter=10)
         clf.fit(X2, Y2)
-        d = clf.decision_function([3, 2])
-        p = clf.predict_proba([3, 2])
+        d = clf.decision_function([[3, 2]])
+        p = clf.predict_proba([[3, 2]])
         if not isinstance(self, SparseSGDClassifierTestCase):
             assert_equal(np.argmax(d, axis=1), np.argmax(p, axis=1))
         else:   # XXX the sparse test gets a different X2 (?)
@@ -516,9 +517,9 @@ def test_sgd_proba(self):
         # which would cause naive normalization to fail (see comment
         # in SGDClassifier.predict_proba)
         x = X.mean(axis=0)
-        d = clf.decision_function(x)
+        d = clf.decision_function([x])
         if np.all(d < -1):  # XXX not true in sparse test case (why?)
-            p = clf.predict_proba(x)
+            p = clf.predict_proba([x])
             assert_array_almost_equal(p[0], [1 / 3.] * 3)
 
     def test_sgd_l1(self):
@@ -703,7 +704,7 @@ def test_partial_fit_binary(self):
         clf.partial_fit(X[:third], Y[:third], classes=classes)
         assert_equal(clf.coef_.shape, (1, X.shape[1]))
         assert_equal(clf.intercept_.shape, (1,))
-        assert_equal(clf.decision_function([0, 0]).shape, (1, ))
+        assert_equal(clf.decision_function([[0, 0]]).shape, (1, ))
         id1 = id(clf.coef_.data)
 
         clf.partial_fit(X[third:], Y[third:])
@@ -722,7 +723,7 @@ def test_partial_fit_multiclass(self):
         clf.partial_fit(X2[:third], Y2[:third], classes=classes)
         assert_equal(clf.coef_.shape, (3, X2.shape[1]))
         assert_equal(clf.intercept_.shape, (3,))
-        assert_equal(clf.decision_function([0, 0]).shape, (1, 3))
+        assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
         id1 = id(clf.coef_.data)
 
         clf.partial_fit(X2[third:], Y2[third:])
@@ -1017,7 +1018,7 @@ def test_partial_fit(self):
         clf.partial_fit(X[:third], Y[:third])
         assert_equal(clf.coef_.shape, (X.shape[1], ))
         assert_equal(clf.intercept_.shape, (1,))
-        assert_equal(clf.decision_function([0, 0]).shape, (1, ))
+        assert_equal(clf.decision_function([[0, 0]]).shape, (1, ))
         id1 = id(clf.coef_.data)
 
         clf.partial_fit(X[third:], Y[third:])

diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
@@ -192,7 +192,7 @@ def euclidean_distances(X, Y=None, Y_norm_squared=None, squared=False):
     X, Y = check_pairwise_arrays(X, Y)
 
     if Y_norm_squared is not None:
-        YY = check_array(Y_norm_squared)
+        YY = check_array(np.atleast_2d(Y_norm_squared))
         if YY.shape != (1, Y.shape[0]):
             raise ValueError(
                 "Incompatible dimensions for Y and Y_norm_squared")
@@ -602,8 +602,7 @@ def paired_cosine_distances(X, Y):
     'l2': paired_euclidean_distances,
     'l1': paired_manhattan_distances,
     'manhattan': paired_manhattan_distances,
-    'cityblock': paired_manhattan_distances,
-    }
+    'cityblock': paired_manhattan_distances}
 
 
 def paired_distances(X, Y, metric="euclidean", **kwds):
@@ -1089,7 +1088,7 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=1, **kwds):
 
     """
     if (metric not in _VALID_METRICS and
-       not callable(metric) and metric != "precomputed"):
+            not callable(metric) and metric != "precomputed"):
         raise ValueError("Unknown metric %s. "
                          "Valid metrics are %s, or 'precomputed', or a "
                          "callable" % (metric, _VALID_METRICS))

diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
@@ -475,7 +475,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
         # convert to float to support sample weight consistently
         Y = Y.astype(np.float64)
         if sample_weight is not None:
-            Y *= check_array(sample_weight).T
+            Y *= check_array(sample_weight)
 
         class_prior = self.class_prior
 
@@ -524,7 +524,7 @@ def fit(self, X, y, sample_weight=None):
         # this means we also don't have to cast X to floating point
         Y = Y.astype(np.float64)
         if sample_weight is not None:
-            Y *= check_array(sample_weight).T
+            Y *= check_array(sample_weight)
 
         class_prior = self.class_prior