From f760d27ac5b2e74c3805a61c51f08096de9bdc4a Mon Sep 17 00:00:00 2001
From: Vighnesh Birodkar <vighneshbirodkar@gmail.com>
Date: Mon, 24 Aug 2015 19:52:40 -0400
Subject: [PATCH 1/3] This is the first step in deprecating 1d arrays

Passing 1D arrays to check_array, without setting `ensure_2d` to false now
raises a deprecation warning before reshaping it. This will later throw an
error.

All Scaler classes also throw warnings when 1D arrays are passed.

All unit tests/doctests are modified to ensure that no 1D arrays are passed,
except in explicit 1D array tests where the warnings have been silenced.

Additional tests are also included which check for different 1D array cases.

2D array tests with one samples and one features are also added and where
they failed, `check_array` call has been modified to give a more useful error
message
---
 doc/modules/model_evaluation.rst              |   2 +-
 doc/modules/model_persistence.rst             |   2 +-
 doc/tutorial/basic/tutorial.rst               |  48 +++----
 sklearn/cluster/hierarchical.py               |   9 +-
 sklearn/cluster/k_means_.py                   |   3 +-
 sklearn/covariance/empirical_covariance_.py   |   3 +
 sklearn/covariance/graph_lasso_.py            |   4 +-
 sklearn/covariance/tests/test_covariance.py   |  14 +-
 .../decomposition/tests/test_dict_learning.py |   8 +-
 sklearn/ensemble/tests/test_forest.py         |   5 +-
 .../ensemble/tests/test_gradient_boosting.py  |   8 +-
 sklearn/feature_selection/base.py             |   3 +-
 sklearn/feature_selection/tests/test_base.py  |   4 +-
 .../tests/test_feature_select.py              |   2 +-
 .../tests/test_variance_threshold.py          |   2 +-
 .../feature_selection/univariate_selection.py |   4 +-
 sklearn/gaussian_process/gaussian_process.py  |   8 +-
 sklearn/lda.py                                |   2 +-
 sklearn/linear_model/omp.py                   |   2 +-
 sklearn/linear_model/randomized_l1.py         |   3 +-
 sklearn/linear_model/tests/test_sgd.py        |  38 ++---
 sklearn/manifold/spectral_embedding_.py       |   3 +
 sklearn/metrics/pairwise.py                   |   9 +-
 sklearn/metrics/tests/test_pairwise.py        |  14 +-
 sklearn/mixture/gmm.py                        |   2 +-
 sklearn/naive_bayes.py                        |   6 +-
 sklearn/neighbors/approximate.py              |   6 +-
 sklearn/neighbors/base.py                     |   4 +-
 sklearn/neighbors/tests/test_approximate.py   |  19 +--
 sklearn/neighbors/tests/test_ball_tree.py     |   4 +-
 sklearn/neighbors/tests/test_kd_tree.py       |   4 +-
 sklearn/neighbors/tests/test_neighbors.py     |  13 +-
 sklearn/neighbors/unsupervised.py             |   2 +-
 sklearn/neural_network/tests/test_rbm.py      |   2 +-
 sklearn/preprocessing/data.py                 |  20 +++
 sklearn/preprocessing/tests/test_data.py      |   3 +
 sklearn/semi_supervised/label_propagation.py  |   8 +-
 sklearn/svm/tests/test_sparse.py              |   4 +-
 sklearn/svm/tests/test_svm.py                 |   5 +-
 sklearn/tests/test_common.py                  |   4 +-
 sklearn/tests/test_dummy.py                   |   4 +-
 sklearn/tests/test_naive_bayes.py             |  14 +-
 sklearn/tests/test_random_projection.py       |   2 +-
 sklearn/tree/tests/test_tree.py               |   7 +-
 sklearn/utils/__init__.py                     |   2 +-
 sklearn/utils/estimator_checks.py             | 132 +++++++++++++++++-
 sklearn/utils/extmath.py                      |   8 +-
 sklearn/utils/tests/test_validation.py        |  15 +-
 sklearn/utils/validation.py                   |  13 +-
 49 files changed, 349 insertions(+), 154 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 9b0556abb1e39..1f2d282499646 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -170,7 +170,7 @@ Here is an example of building custom scorers, and of using the
     >>> #  and predictions defined below.
     >>> loss  = make_scorer(my_custom_loss_func, greater_is_better=False)
     >>> score = make_scorer(my_custom_loss_func, greater_is_better=True)
-    >>> ground_truth = [1, 1]
+    >>> ground_truth = [[1, 1]]
     >>> predictions  = [0, 1]
     >>> from sklearn.dummy import DummyClassifier
     >>> clf = DummyClassifier(strategy='most_frequent', random_state=0)
diff --git a/doc/modules/model_persistence.rst b/doc/modules/model_persistence.rst
index e6c2f26b3bd19..403257fca0817 100644
--- a/doc/modules/model_persistence.rst
+++ b/doc/modules/model_persistence.rst
@@ -30,7 +30,7 @@ persistence model, namely `pickle <http://docs.python.org/library/pickle.html>`_
   >>> import pickle
   >>> s = pickle.dumps(clf)
   >>> clf2 = pickle.loads(s)
-  >>> clf2.predict(X[0])
+  >>> clf2.predict([X[0]])
   array([0])
   >>> y[0]
   0
diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
index 21aa592ad7370..20511d8ca7a4c 100644
--- a/doc/tutorial/basic/tutorial.rst
+++ b/doc/tutorial/basic/tutorial.rst
@@ -5,7 +5,7 @@ An introduction to machine learning with scikit-learn
 
 .. topic:: Section contents
 
-    In this section, we introduce the `machine learning 
+    In this section, we introduce the `machine learning
     <http://en.wikipedia.org/wiki/Machine_learning>`_
     vocabulary that we use throughout scikit-learn and give a
     simple learning example.
@@ -14,30 +14,30 @@ An introduction to machine learning with scikit-learn
 Machine learning: the problem setting
 -------------------------------------
 
-In general, a learning problem considers a set of n 
+In general, a learning problem considers a set of n
 `samples <http://en.wikipedia.org/wiki/Sample_(statistics)>`_ of
 data and then tries to predict properties of unknown data. If each sample is
 more than a single number and, for instance, a multi-dimensional entry
-(aka `multivariate <http://en.wikipedia.org/wiki/Multivariate_random_variable>`_ 
+(aka `multivariate <http://en.wikipedia.org/wiki/Multivariate_random_variable>`_
 data), is it said to have several attributes or **features**.
 
 We can separate learning problems in a few large categories:
 
- * `supervised learning <http://en.wikipedia.org/wiki/Supervised_learning>`_, 
-   in which the data comes with additional attributes that we want to predict 
+ * `supervised learning <http://en.wikipedia.org/wiki/Supervised_learning>`_,
+   in which the data comes with additional attributes that we want to predict
    (:ref:`Click here <supervised-learning>`
-   to go to the scikit-learn supervised learning page).This problem 
+   to go to the scikit-learn supervised learning page).This problem
    can be either:
 
-    * `classification 
+    * `classification
       <http://en.wikipedia.org/wiki/Classification_in_machine_learning>`_:
       samples belong to two or more classes and we
       want to learn from already labeled data how to predict the class
       of unlabeled data. An example of classification problem would
-      be the handwritten digit recognition example, in which the aim is 
+      be the handwritten digit recognition example, in which the aim is
       to assign each input vector to one of a finite number of discrete
-      categories.  Another way to think of classification is as a discrete 
-      (as opposed to continuous) form of supervised learning where one has a 
+      categories.  Another way to think of classification is as a discrete
+      (as opposed to continuous) form of supervised learning where one has a
       limited number of categories and for each of the n samples provided,
       one is to try to label them with the correct category or class.
 
@@ -48,15 +48,15 @@ We can separate learning problems in a few large categories:
       length of a salmon as a function of its age and weight.
 
  * `unsupervised learning <http://en.wikipedia.org/wiki/Unsupervised_learning>`_,
-   in which the training data consists of a set of input vectors x 
-   without any corresponding target values. The goal in such problems 
-   may be to discover groups of similar examples within the data, where 
-   it is called `clustering <http://en.wikipedia.org/wiki/Cluster_analysis>`_, 
-   or to determine the distribution of data within the input space, known as 
-   `density estimation <http://en.wikipedia.org/wiki/Density_estimation>`_, or 
-   to project the data from a high-dimensional space down to two or three 
-   dimensions for the purpose of *visualization* 
-   (:ref:`Click here <unsupervised-learning>` 
+   in which the training data consists of a set of input vectors x
+   without any corresponding target values. The goal in such problems
+   may be to discover groups of similar examples within the data, where
+   it is called `clustering <http://en.wikipedia.org/wiki/Cluster_analysis>`_,
+   or to determine the distribution of data within the input space, known as
+   `density estimation <http://en.wikipedia.org/wiki/Density_estimation>`_, or
+   to project the data from a high-dimensional space down to two or three
+   dimensions for the purpose of *visualization*
+   (:ref:`Click here <unsupervised-learning>`
    to go to the Scikit-Learn unsupervised learning page).
 
 .. topic:: Training set and testing set
@@ -143,7 +143,7 @@ Learning and predicting
 
 In the case of the digits dataset, the task is to predict, given an image,
 which digit it represents. We are given samples of each of the 10
-possible classes (the digits zero through nine) on which we *fit* an 
+possible classes (the digits zero through nine) on which we *fit* an
 `estimator <http://en.wikipedia.org/wiki/Estimator>`_ to be able to *predict*
 the classes to which unseen samples belong.
 
@@ -185,7 +185,7 @@ Now you can predict new values, in particular, we can ask to the
 classifier what is the digit of our last image in the ``digits`` dataset,
 which we have not used to train the classifier::
 
-  >>> clf.predict(digits.data[-1])
+  >>> clf.predict([digits.data[-1]])
   array([8])
 
 The corresponding image is the following:
@@ -223,7 +223,7 @@ persistence model, namely `pickle <http://docs.python.org/library/pickle.html>`_
   >>> import pickle
   >>> s = pickle.dumps(clf)
   >>> clf2 = pickle.loads(s)
-  >>> clf2.predict(X[0])
+  >>> clf2.predict([X[0]])
   array([0])
   >>> y[0]
   0
@@ -235,10 +235,10 @@ and not to a string::
 
   >>> from sklearn.externals import joblib
   >>> joblib.dump(clf, 'filename.pkl') # doctest: +SKIP
-  
+
 Later you can load back the pickled model (possibly in another Python process)
 with::
-  
+
   >>> clf = joblib.load('filename.pkl') # doctest:+SKIP
 
 .. note::
diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py
index e580d4e7ae2b2..8c75ef7c69602 100644
--- a/sklearn/cluster/hierarchical.py
+++ b/sklearn/cluster/hierarchical.py
@@ -713,7 +713,7 @@ def fit(self, X, y=None):
         -------
         self
         """
-        X = check_array(X)
+        X = check_array(X, ensure_min_samples=2)
         memory = self.memory
         if isinstance(memory, six.string_types):
             memory = Memory(cachedir=memory, verbose=0)
@@ -869,11 +869,8 @@ def fit(self, X, y=None, **params):
         -------
         self
         """
-        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
-        if not (len(X.shape) == 2 and X.shape[0] > 0):
-            raise ValueError('At least one sample is required to fit the '
-                             'model. A data matrix of shape %s was given.'
-                             % (X.shape, ))
+        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
+                        ensure_min_features=2)
         return AgglomerativeClustering.fit(self, X.T, **params)
 
     @property
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
index 7439f592a4274..082f6733192a4 100644
--- a/sklearn/cluster/k_means_.py
+++ b/sklearn/cluster/k_means_.py
@@ -95,7 +95,8 @@ def _k_init(X, n_clusters, x_squared_norms, random_state, n_local_trials=None):
 
     # Initialize list of closest distances and calculate current potential
     closest_dist_sq = euclidean_distances(
-        centers[0], X, Y_norm_squared=x_squared_norms, squared=True)
+        centers[0, np.newaxis], X, Y_norm_squared=x_squared_norms,
+        squared=True)
     current_pot = closest_dist_sq.sum()
 
     # Pick the remaining n_clusters-1 points
diff --git a/sklearn/covariance/empirical_covariance_.py b/sklearn/covariance/empirical_covariance_.py
index f5f4cc10661aa..85b6ee657ea4d 100644
--- a/sklearn/covariance/empirical_covariance_.py
+++ b/sklearn/covariance/empirical_covariance_.py
@@ -70,6 +70,7 @@ def empirical_covariance(X, assume_centered=False):
     X = np.asarray(X)
     if X.ndim == 1:
         X = np.reshape(X, (1, -1))
+
     if X.shape[0] == 1:
         warnings.warn("Only one sample available. "
                       "You may want to reshape your data array")
@@ -79,6 +80,8 @@ def empirical_covariance(X, assume_centered=False):
     else:
         covariance = np.cov(X.T, bias=1)
 
+    if covariance.ndim == 0:
+        covariance = np.array([[covariance]])
     return covariance
 
 
diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py
index fd94d360013cf..3df995f1bbe33 100644
--- a/sklearn/covariance/graph_lasso_.py
+++ b/sklearn/covariance/graph_lasso_.py
@@ -334,7 +334,7 @@ def __init__(self, alpha=.01, mode='cd', tol=1e-4, enet_tol=1e-4,
         self.store_precision = True
 
     def fit(self, X, y=None):
-        X = check_array(X)
+        X = check_array(X, ensure_min_features=2, ensure_min_samples=2)
         if self.assume_centered:
             self.location_ = np.zeros(X.shape[1])
         else:
@@ -557,7 +557,7 @@ def fit(self, X, y=None):
         X : ndarray, shape (n_samples, n_features)
             Data from which to compute the covariance estimate
         """
-        X = check_array(X)
+        X = check_array(X, ensure_min_features=2)
         if self.assume_centered:
             self.location_ = np.zeros(X.shape[1])
         else:
diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py
index f34029b400144..79e6f1ae6ed43 100644
--- a/sklearn/covariance/tests/test_covariance.py
+++ b/sklearn/covariance/tests/test_covariance.py
@@ -55,8 +55,8 @@ def test_covariance():
         cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0)
 
     # test with one sample
-    # FIXME I don't know what this test does
-    X_1sample = np.arange(5)
+    # Create X with 1 sample and 5 features
+    X_1sample = np.arange(5).reshape(1, 5)
     cov = EmpiricalCovariance()
     assert_warns(UserWarning, cov.fit, X_1sample)
     assert_array_almost_equal(cov.covariance_,
@@ -172,8 +172,8 @@ def test_ledoit_wolf():
     assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)
 
     # test with one sample
-    # FIXME I don't know what this test does
-    X_1sample = np.arange(5)
+    # warning should be raised when using only 1 sample
+    X_1sample = np.arange(5).reshape(1, 5)
     lw = LedoitWolf()
     assert_warns(UserWarning, lw.fit, X_1sample)
     assert_array_almost_equal(lw.covariance_,
@@ -220,7 +220,7 @@ def test_oas():
     assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)
 
     # test with n_features = 1
-    X_1d = X[:, 0].reshape((-1, 1))
+    X_1d = X[:, 0, np.newaxis]
     oa = OAS(assume_centered=True)
     oa.fit(X_1d)
     oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
@@ -259,8 +259,8 @@ def test_oas():
     assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)
 
     # test with one sample
-    # FIXME I don't know what this test does
-    X_1sample = np.arange(5)
+    # warning should be raised when using only 1 sample
+    X_1sample = np.arange(5).reshape(1, 5)
     oa = OAS()
     assert_warns(UserWarning, oa.fit, X_1sample)
     assert_array_almost_equal(oa.covariance_,
diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
index 42aa2cd9d5769..838f26650278c 100644
--- a/sklearn/decomposition/tests/test_dict_learning.py
+++ b/sklearn/decomposition/tests/test_dict_learning.py
@@ -75,11 +75,11 @@ def test_dict_learning_nonzero_coefs():
     n_components = 4
     dico = DictionaryLearning(n_components, transform_algorithm='lars',
                               transform_n_nonzero_coefs=3, random_state=0)
-    code = dico.fit(X).transform(X[1])
+    code = dico.fit(X).transform(X[np.newaxis, 1])
     assert_true(len(np.flatnonzero(code)) == 3)
 
     dico.set_params(transform_algorithm='omp')
-    code = dico.transform(X[1])
+    code = dico.transform(X[np.newaxis, 1])
     assert_equal(len(np.flatnonzero(code)), 3)
 
 
@@ -173,7 +173,7 @@ def test_dict_learning_online_partial_fit():
                                         random_state=0)
     for i in range(10):
         for sample in X:
-            dict2.partial_fit(sample)
+            dict2.partial_fit(sample[np.newaxis, :])
 
     assert_true(not np.all(sparse_encode(X, dict1.components_, alpha=1) ==
                            0))
@@ -225,4 +225,4 @@ def test_sparse_coder_estimator():
     code = SparseCoder(dictionary=V, transform_algorithm='lasso_lars',
                        transform_alpha=0.001).transform(X)
     assert_true(not np.all(code == 0))
-    assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
\ No newline at end of file
+    assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
index e12f52d66d94a..2a14632929591 100644
--- a/sklearn/ensemble/tests/test_forest.py
+++ b/sklearn/ensemble/tests/test_forest.py
@@ -39,6 +39,7 @@
 
 from sklearn.tree.tree import SPARSE_SPLITTERS
 
+
 # toy sample
 X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
 y = [-1, -1, -1, 1, 1, 1]
@@ -724,6 +725,7 @@ def test_memory_layout():
         yield check_memory_layout, name, dtype
 
 
+@ignore_warnings
 def check_1d_input(name, X, X_2d, y):
     ForestEstimator = FOREST_ESTIMATORS[name]
     assert_raises(ValueError, ForestEstimator(random_state=0).fit, X, y)
@@ -735,8 +737,9 @@ def check_1d_input(name, X, X_2d, y):
         assert_raises(ValueError, est.predict, X)
 
 
+@ignore_warnings
 def test_1d_input():
-    X = iris.data[:, 0].ravel()
+    X = iris.data[:, 0]
     X_2d = iris.data[:, 0].reshape((-1, 1))
     y = iris.target
 
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index c1043e8da482f..29a1e4d3c33db 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -311,7 +311,7 @@ def test_check_inputs_predict():
     x = np.array([1.0, 2.0])[:, np.newaxis]
     assert_raises(ValueError, clf.predict, x)
 
-    x = np.array([])
+    x = np.array([[]])
     assert_raises(ValueError, clf.predict, x)
 
     x = np.array([1.0, 2.0, 3.0])[:, np.newaxis]
@@ -323,7 +323,7 @@ def test_check_inputs_predict():
     x = np.array([1.0, 2.0])[:, np.newaxis]
     assert_raises(ValueError, clf.predict, x)
 
-    x = np.array([])
+    x = np.array([[]])
     assert_raises(ValueError, clf.predict, x)
 
     x = np.array([1.0, 2.0, 3.0])[:, np.newaxis]
@@ -492,9 +492,9 @@ def test_degenerate_targets():
 
     clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
     clf.fit(X, np.ones(len(X)))
-    clf.predict(rng.rand(2))
+    clf.predict([rng.rand(2)])
     assert_array_equal(np.ones((1,), dtype=np.float64),
-                       clf.predict(rng.rand(2)))
+                       clf.predict([rng.rand(2)]))
 
 
 def test_quantile_loss():
diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py
index 16f2d274ace62..e3ff0ed3bbebf 100644
--- a/sklearn/feature_selection/base.py
+++ b/sklearn/feature_selection/base.py
@@ -103,7 +103,8 @@ def inverse_transform(self, X):
             # insert additional entries in indptr:
             # e.g. if transform changed indptr from [0 2 6 7] to [0 2 3]
             # col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3]
-            col_nonzeros = self.inverse_transform(np.diff(X.indptr)).ravel()
+            it = self.inverse_transform(np.diff(X.indptr).reshape(1, -1))
+            col_nonzeros = it.ravel()
             indptr = np.concatenate([[0], np.cumsum(col_nonzeros)])
             Xt = csc_matrix((X.data, X.indices, indptr),
                             shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype)
diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py
index fe17fa0f75326..8374c61ca8ff6 100644
--- a/sklearn/feature_selection/tests/test_base.py
+++ b/sklearn/feature_selection/tests/test_base.py
@@ -50,7 +50,7 @@ def test_transform_dense():
     assert_equal(np.float32, sel.transform(X.astype(np.float32)).dtype)
 
     # Check 1d list and other dtype:
-    names_t_actual = sel.transform(feature_names)
+    names_t_actual = sel.transform([feature_names])
     assert_array_equal(feature_names_t, names_t_actual.ravel())
 
     # Check wrong shape raises error
@@ -85,7 +85,7 @@ def test_inverse_transform_dense():
                  sel.inverse_transform(Xt.astype(np.float32)).dtype)
 
     # Check 1d list and other dtype:
-    names_inv_actual = sel.inverse_transform(feature_names_t)
+    names_inv_actual = sel.inverse_transform([feature_names_t])
     assert_array_equal(feature_names_inv, names_inv_actual.ravel())
 
     # Check wrong shape raises error
diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index adc289888fa1f..204d7c2e25dba 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -491,7 +491,7 @@ def test_tied_scores():
 
     for n_features in [1, 2, 3]:
         sel = SelectKBest(chi2, k=n_features).fit(X_train, y_train)
-        X_test = sel.transform([0, 1, 2])
+        X_test = sel.transform([[0, 1, 2]])
         assert_array_equal(X_test[0], np.arange(3)[-n_features:])
 
 
diff --git a/sklearn/feature_selection/tests/test_variance_threshold.py b/sklearn/feature_selection/tests/test_variance_threshold.py
index 87af92d7fce15..a40491302f350 100644
--- a/sklearn/feature_selection/tests/test_variance_threshold.py
+++ b/sklearn/feature_selection/tests/test_variance_threshold.py
@@ -17,7 +17,7 @@ def test_zero_variance():
         sel = VarianceThreshold().fit(X)
         assert_array_equal([0, 1, 3, 4], sel.get_support(indices=True))
 
-    assert_raises(ValueError, VarianceThreshold().fit, [0, 1, 2, 3])
+    assert_raises(ValueError, VarianceThreshold().fit, [[0, 1, 2, 3]])
     assert_raises(ValueError, VarianceThreshold().fit, [[0, 1], [0, 1]])
 
 
diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py
index 980f1a9daf56b..04bef2b785359 100644
--- a/sklearn/feature_selection/univariate_selection.py
+++ b/sklearn/feature_selection/univariate_selection.py
@@ -219,8 +219,8 @@ def chi2(X, y):
 
     observed = safe_sparse_dot(Y.T, X)          # n_classes * n_features
 
-    feature_count = check_array(X.sum(axis=0))
-    class_prob = check_array(Y.mean(axis=0))
+    feature_count = X.sum(axis=0).reshape(1, -1)
+    class_prob = Y.mean(axis=0).reshape(1, -1)
     expected = np.dot(class_prob.T, feature_count)
 
     return _chisquare(observed, expected)
diff --git a/sklearn/gaussian_process/gaussian_process.py b/sklearn/gaussian_process/gaussian_process.py
index 07f688bc57f61..3d4d97da5e2b8 100644
--- a/sklearn/gaussian_process/gaussian_process.py
+++ b/sklearn/gaussian_process/gaussian_process.py
@@ -825,7 +825,7 @@ def _check_params(self, n_samples=None):
 
         # Check regression weights if given (Ordinary Kriging)
         if self.beta0 is not None:
-            self.beta0 = check_array(self.beta0)
+            self.beta0 = np.atleast_2d(self.beta0)
             if self.beta0.shape[1] != 1:
                 # Force to column vector
                 self.beta0 = self.beta0.T
@@ -845,12 +845,12 @@ def _check_params(self, n_samples=None):
                              "'light', %s was given." % self.storage_mode)
 
         # Check correlation parameters
-        self.theta0 = check_array(self.theta0)
+        self.theta0 = np.atleast_2d(self.theta0)
         lth = self.theta0.size
 
         if self.thetaL is not None and self.thetaU is not None:
-            self.thetaL = check_array(self.thetaL)
-            self.thetaU = check_array(self.thetaU)
+            self.thetaL = np.atleast_2d(self.thetaL)
+            self.thetaU = np.atleast_2d(self.thetaU)
             if self.thetaL.size != lth or self.thetaU.size != lth:
                 raise ValueError("theta0, thetaL and thetaU must have the "
                                  "same length.")
diff --git a/sklearn/lda.py b/sklearn/lda.py
index 47d3f4221adaf..c16a8fcce0f6f 100644
--- a/sklearn/lda.py
+++ b/sklearn/lda.py
@@ -414,7 +414,7 @@ def fit(self, X, y, store_covariance=False, tol=1.0e-4):
                           " 0.16 and will be removed from fit() in 0.18",
                           DeprecationWarning)
             self.tol = tol
-        X, y = check_X_y(X, y)
+        X, y = check_X_y(X, y, ensure_min_samples=2)
         self.classes_ = unique_labels(y)
 
         if self.priors is None:  # estimate priors from sample
diff --git a/sklearn/linear_model/omp.py b/sklearn/linear_model/omp.py
index 19435cdacc735..40ce91e56612c 100644
--- a/sklearn/linear_model/omp.py
+++ b/sklearn/linear_model/omp.py
@@ -823,7 +823,7 @@ def fit(self, X, y):
         self : object
             returns an instance of self.
         """
-        X, y = check_X_y(X, y, y_numeric=True)
+        X, y = check_X_y(X, y, y_numeric=True, ensure_min_features=2)
         X = as_float_array(X, copy=False, force_all_finite=False)
         cv = check_cv(self.cv, X, y, classifier=False)
         max_iter = (min(max(int(0.1 * X.shape[1]), 5), X.shape[1])
diff --git a/sklearn/linear_model/randomized_l1.py b/sklearn/linear_model/randomized_l1.py
index 235761152046e..652f0375215cc 100644
--- a/sklearn/linear_model/randomized_l1.py
+++ b/sklearn/linear_model/randomized_l1.py
@@ -88,7 +88,8 @@ def fit(self, X, y):
         self : object
             Returns an instance of self.
         """
-        X, y = check_X_y(X, y, ['csr', 'csc'], y_numeric=True)
+        X, y = check_X_y(X, y, ['csr', 'csc'], y_numeric=True,
+                         ensure_min_samples=2)
         X = as_float_array(X, copy=False)
         n_samples, n_features = X.shape
 
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index cdf254708e2c4..0b00ec46bcd24 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -391,7 +391,7 @@ def test_sgd_multiclass(self):
         clf = self.factory(alpha=0.01, n_iter=20).fit(X2, Y2)
         assert_equal(clf.coef_.shape, (3, 2))
         assert_equal(clf.intercept_.shape, (3,))
-        assert_equal(clf.decision_function([0, 0]).shape, (1, 3))
+        assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
         pred = clf.predict(T2)
         assert_array_equal(pred, true_result2)
 
@@ -433,7 +433,7 @@ def test_sgd_multiclass_njobs(self):
         clf = self.factory(alpha=0.01, n_iter=20, n_jobs=2).fit(X2, Y2)
         assert_equal(clf.coef_.shape, (3, 2))
         assert_equal(clf.intercept_.shape, (3,))
-        assert_equal(clf.decision_function([0, 0]).shape, (1, 3))
+        assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
         pred = clf.predict(T2)
         assert_array_equal(pred, true_result2)
 
@@ -470,14 +470,14 @@ def test_sgd_proba(self):
         for loss in ["log", "modified_huber"]:
             clf = self.factory(loss="modified_huber", alpha=0.01, n_iter=10)
             clf.fit(X, Y)
-            p = clf.predict_proba([3, 2])
+            p = clf.predict_proba([[3, 2]])
             assert_true(p[0, 1] > 0.5)
-            p = clf.predict_proba([-1, -1])
+            p = clf.predict_proba([[-1, -1]])
             assert_true(p[0, 1] < 0.5)
 
-            p = clf.predict_log_proba([3, 2])
+            p = clf.predict_log_proba([[3, 2]])
             assert_true(p[0, 1] > p[0, 0])
-            p = clf.predict_log_proba([-1, -1])
+            p = clf.predict_log_proba([[-1, -1]])
             assert_true(p[0, 1] < p[0, 0])
 
         # log loss multiclass probability estimates
@@ -489,16 +489,16 @@ def test_sgd_proba(self):
         assert_almost_equal(p[0].sum(), 1)
         assert_true(np.all(p[0] >= 0))
 
-        p = clf.predict_proba([-1, -1])
-        d = clf.decision_function([-1, -1])
+        p = clf.predict_proba([[-1, -1]])
+        d = clf.decision_function([[-1, -1]])
         assert_array_equal(np.argsort(p[0]), np.argsort(d[0]))
 
-        l = clf.predict_log_proba([3, 2])
-        p = clf.predict_proba([3, 2])
+        l = clf.predict_log_proba([[3, 2]])
+        p = clf.predict_proba([[3, 2]])
         assert_array_almost_equal(np.log(p), l)
 
-        l = clf.predict_log_proba([-1, -1])
-        p = clf.predict_proba([-1, -1])
+        l = clf.predict_log_proba([[-1, -1]])
+        p = clf.predict_proba([[-1, -1]])
         assert_array_almost_equal(np.log(p), l)
 
         # Modified Huber multiclass probability estimates; requires a separate
@@ -506,8 +506,8 @@ def test_sgd_proba(self):
         # ordering present in decision_function output.
         clf = self.factory(loss="modified_huber", alpha=0.01, n_iter=10)
         clf.fit(X2, Y2)
-        d = clf.decision_function([3, 2])
-        p = clf.predict_proba([3, 2])
+        d = clf.decision_function([[3, 2]])
+        p = clf.predict_proba([[3, 2]])
         if not isinstance(self, SparseSGDClassifierTestCase):
             assert_equal(np.argmax(d, axis=1), np.argmax(p, axis=1))
         else:   # XXX the sparse test gets a different X2 (?)
@@ -517,9 +517,9 @@ def test_sgd_proba(self):
         # which would cause naive normalization to fail (see comment
         # in SGDClassifier.predict_proba)
         x = X.mean(axis=0)
-        d = clf.decision_function(x)
+        d = clf.decision_function([x])
         if np.all(d < -1):  # XXX not true in sparse test case (why?)
-            p = clf.predict_proba(x)
+            p = clf.predict_proba([x])
             assert_array_almost_equal(p[0], [1 / 3.] * 3)
 
     def test_sgd_l1(self):
@@ -705,7 +705,7 @@ def test_partial_fit_binary(self):
         clf.partial_fit(X[:third], Y[:third], classes=classes)
         assert_equal(clf.coef_.shape, (1, X.shape[1]))
         assert_equal(clf.intercept_.shape, (1,))
-        assert_equal(clf.decision_function([0, 0]).shape, (1, ))
+        assert_equal(clf.decision_function([[0, 0]]).shape, (1, ))
         id1 = id(clf.coef_.data)
 
         clf.partial_fit(X[third:], Y[third:])
@@ -724,7 +724,7 @@ def test_partial_fit_multiclass(self):
         clf.partial_fit(X2[:third], Y2[:third], classes=classes)
         assert_equal(clf.coef_.shape, (3, X2.shape[1]))
         assert_equal(clf.intercept_.shape, (3,))
-        assert_equal(clf.decision_function([0, 0]).shape, (1, 3))
+        assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
         id1 = id(clf.coef_.data)
 
         clf.partial_fit(X2[third:], Y2[third:])
@@ -1019,7 +1019,7 @@ def test_partial_fit(self):
         clf.partial_fit(X[:third], Y[:third])
         assert_equal(clf.coef_.shape, (X.shape[1], ))
         assert_equal(clf.intercept_.shape, (1,))
-        assert_equal(clf.decision_function([0, 0]).shape, (1, ))
+        assert_equal(clf.decision_function([[0, 0]]).shape, (1, ))
         id1 = id(clf.coef_.data)
 
         clf.partial_fit(X[third:], Y[third:])
diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py
index 9edcaa97a168f..c67064b5bb288 100644
--- a/sklearn/manifold/spectral_embedding_.py
+++ b/sklearn/manifold/spectral_embedding_.py
@@ -451,6 +451,9 @@ def fit(self, X, y=None):
         self : object
             Returns the instance itself.
         """
+
+        X = check_array(X, ensure_min_samples=2)
+
         random_state = check_random_state(self.random_state)
         if isinstance(self.affinity, six.string_types):
             if self.affinity not in set(("nearest_neighbors", "rbf",
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index a5b2709a00ddb..ce31444169377 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -220,7 +220,8 @@ def euclidean_distances(X, Y=None, Y_norm_squared=None, squared=False,
     if X is Y:  # shortcut in the common case euclidean_distances(X, X)
         YY = XX.T
     elif Y_norm_squared is not None:
-        YY = check_array(Y_norm_squared)
+        YY = np.atleast_2d(Y_norm_squared)
+
         if YY.shape != (1, Y.shape[0]):
             raise ValueError(
                 "Incompatible dimensions for Y and Y_norm_squared")
@@ -486,11 +487,11 @@ def manhattan_distances(X, Y=None, sum_over_features=True,
     Examples
     --------
     >>> from sklearn.metrics.pairwise import manhattan_distances
-    >>> manhattan_distances(3, 3)#doctest:+ELLIPSIS
+    >>> manhattan_distances([[3]], [[3]])#doctest:+ELLIPSIS
     array([[ 0.]])
-    >>> manhattan_distances(3, 2)#doctest:+ELLIPSIS
+    >>> manhattan_distances([[3]], [[2]])#doctest:+ELLIPSIS
     array([[ 1.]])
-    >>> manhattan_distances(2, 3)#doctest:+ELLIPSIS
+    >>> manhattan_distances([[2]], [[3]])#doctest:+ELLIPSIS
     array([[ 1.]])
     >>> manhattan_distances([[1, 2], [3, 4]],\
          [[1, 2], [0, 3]])#doctest:+ELLIPSIS
diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py
index 328d13e971ce7..5a23610b2ba23 100644
--- a/sklearn/metrics/tests/test_pairwise.py
+++ b/sklearn/metrics/tests/test_pairwise.py
@@ -366,8 +366,8 @@ def test_euclidean_distances():
     rng = np.random.RandomState(0)
     X = rng.random_sample((10, 4))
     Y = rng.random_sample((20, 4))
-    X_norm_sq = (X ** 2).sum(axis=1)
-    Y_norm_sq = (Y ** 2).sum(axis=1)
+    X_norm_sq = (X ** 2).sum(axis=1).reshape(1, -1)
+    Y_norm_sq = (Y ** 2).sum(axis=1).reshape(1, -1)
 
     # check that we still get the right answers with {X,Y}_norm_squared
     D1 = euclidean_distances(X, Y)
@@ -573,11 +573,13 @@ def test_check_different_dimensions():
 
 def test_check_invalid_dimensions():
     # Ensure an error is raised on 1D input arrays.
-    XA = np.arange(45)
-    XB = np.resize(np.arange(32), (4, 8))
+    # The modified tests are not 1D. In the old test, the array was internally
+    # converted to 2D anyways
+    XA = np.arange(45).reshape(9, 5)
+    XB = np.arange(32).reshape(4, 8)
     assert_raises(ValueError, check_pairwise_arrays, XA, XB)
-    XA = np.resize(np.arange(45), (5, 9))
-    XB = np.arange(32)
+    XA = np.arange(45).reshape(9, 5)
+    XB = np.arange(32).reshape(4, 8)
     assert_raises(ValueError, check_pairwise_arrays, XA, XB)
 
 
diff --git a/sklearn/mixture/gmm.py b/sklearn/mixture/gmm.py
index bb2a3fed630e5..42c9aadb93403 100644
--- a/sklearn/mixture/gmm.py
+++ b/sklearn/mixture/gmm.py
@@ -462,7 +462,7 @@ def _fit(self, X, y=None, do_prediction=False):
         """
 
         # initialization step
-        X = check_array(X, dtype=np.float64)
+        X = check_array(X, dtype=np.float64, ensure_min_samples=2)
         if X.shape[0] < self.n_components:
             raise ValueError(
                 'GMM estimation with %s components, but got only %s samples' %
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index f80524028258b..057d0916e5c42 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -478,6 +478,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
         # We convert it to np.float64 to support sample_weight consistently
         Y = Y.astype(np.float64)
         if sample_weight is not None:
+            sample_weight = np.atleast_2d(sample_weight)
             Y *= check_array(sample_weight).T
 
         class_prior = self.class_prior
@@ -528,6 +529,7 @@ def fit(self, X, y, sample_weight=None):
         # this means we also don't have to cast X to floating point
         Y = Y.astype(np.float64)
         if sample_weight is not None:
+            sample_weight = np.atleast_2d(sample_weight)
             Y *= check_array(sample_weight).T
 
         class_prior = self.class_prior
@@ -617,7 +619,7 @@ class MultinomialNB(BaseDiscreteNB):
     >>> clf = MultinomialNB()
     >>> clf.fit(X, y)
     MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
-    >>> print(clf.predict(X[2]))
+    >>> print(clf.predict([X[2]]))
     [3]
 
     Notes
@@ -715,7 +717,7 @@ class BernoulliNB(BaseDiscreteNB):
     >>> clf = BernoulliNB()
     >>> clf.fit(X, Y)
     BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)
-    >>> print(clf.predict(X[2]))
+    >>> print(clf.predict([X[2]]))
     [3]
 
     References
diff --git a/sklearn/neighbors/approximate.py b/sklearn/neighbors/approximate.py
index 81d9474acce2d..8960d03c79a35 100644
--- a/sklearn/neighbors/approximate.py
+++ b/sklearn/neighbors/approximate.py
@@ -435,7 +435,8 @@ def kneighbors(self, X, n_neighbors=None, return_distance=True):
         neighbors, distances = [], []
         bin_queries, max_depth = self._query(X)
         for i in range(X.shape[0]):
-            neighs, dists = self._get_candidates(X[i], max_depth[i],
+
+            neighs, dists = self._get_candidates(X[[i]], max_depth[i],
                                                  bin_queries[i],
                                                  n_neighbors)
             neighbors.append(neighs)
@@ -494,7 +495,8 @@ def radius_neighbors(self, X, radius=None, return_distance=True):
         neighbors, distances = [], []
         bin_queries, max_depth = self._query(X)
         for i in range(X.shape[0]):
-            neighs, dists = self._get_radius_neighbors(X[i], max_depth[i],
+
+            neighs, dists = self._get_radius_neighbors(X[[i]], max_depth[i],
                                                        bin_queries[i], radius)
             neighbors.append(neighs)
             distances.append(dists)
diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index 17b147bbb4025..4a311894ef81f 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -323,7 +323,7 @@ class from an array representing our data set and ask who's
         >>> neigh = NearestNeighbors(n_neighbors=1)
         >>> neigh.fit(samples) # doctest: +ELLIPSIS
         NearestNeighbors(algorithm='auto', leaf_size=30, ...)
-        >>> print(neigh.kneighbors([1., 1., 1.])) # doctest: +ELLIPSIS
+        >>> print(neigh.kneighbors([[1., 1., 1.]])) # doctest: +ELLIPSIS
         (array([[ 0.5]]), array([[2]]...))
 
         As you can see, it returns [[0.5]], and [[2]], which means that the
@@ -564,7 +564,7 @@ class from an array representing our data set and ask who's
         >>> neigh = NearestNeighbors(radius=1.6)
         >>> neigh.fit(samples) # doctest: +ELLIPSIS
         NearestNeighbors(algorithm='auto', leaf_size=30, ...)
-        >>> rng = neigh.radius_neighbors([1., 1., 1.])
+        >>> rng = neigh.radius_neighbors([[1., 1., 1.]])
         >>> print(np.asarray(rng[0][0])) # doctest: +ELLIPSIS
         [ 1.5  0.5]
         >>> print(np.asarray(rng[1][0])) # doctest: +ELLIPSIS
diff --git a/sklearn/neighbors/tests/test_approximate.py b/sklearn/neighbors/tests/test_approximate.py
index 50794e263c831..7e32fa130a130 100644
--- a/sklearn/neighbors/tests/test_approximate.py
+++ b/sklearn/neighbors/tests/test_approximate.py
@@ -41,7 +41,8 @@ def test_neighbors_accuracy_with_n_candidates():
         lshf = LSHForest(n_candidates=n_candidates)
         lshf.fit(X)
         for j in range(n_iter):
-            query = X[rng.randint(0, n_samples)]
+            query = X[rng.randint(0, n_samples)].reshape(1, -1)
+
             neighbors = lshf.kneighbors(query, n_neighbors=n_points,
                                         return_distance=False)
             distances = pairwise_distances(query, X, metric='cosine')
@@ -75,7 +76,7 @@ def test_neighbors_accuracy_with_n_estimators():
         lshf = LSHForest(n_candidates=500, n_estimators=t)
         lshf.fit(X)
         for j in range(n_iter):
-            query = X[rng.randint(0, n_samples)]
+            query = X[rng.randint(0, n_samples)].reshape(1, -1)
             neighbors = lshf.kneighbors(query, n_neighbors=n_points,
                                         return_distance=False)
             distances = pairwise_distances(query, X, metric='cosine')
@@ -114,7 +115,7 @@ def test_kneighbors():
 
     for i in range(n_iter):
         n_neighbors = rng.randint(0, n_samples)
-        query = X[rng.randint(0, n_samples)]
+        query = X[rng.randint(0, n_samples)].reshape(1, -1)
         neighbors = lshf.kneighbors(query, n_neighbors=n_neighbors,
                                     return_distance=False)
         # Desired number of neighbors should be returned.
@@ -133,7 +134,7 @@ def test_kneighbors():
                                 return_distance=False)
     assert_equal(neighbors.shape[0], n_queries)
     # Test random point(not in the data set)
-    query = rng.randn(n_features)
+    query = rng.randn(n_features).reshape(1, -1)
     lshf.kneighbors(query, n_neighbors=1,
                     return_distance=False)
     # Test n_neighbors at initialization
@@ -165,7 +166,7 @@ def test_radius_neighbors():
 
     for i in range(n_iter):
         # Select a random point in the dataset as the query
-        query = X[rng.randint(0, n_samples)]
+        query = X[rng.randint(0, n_samples)].reshape(1, -1)
 
         # At least one neighbor should be returned when the radius is the
         # mean distance from the query to the points of the dataset.
@@ -197,7 +198,7 @@ def test_radius_neighbors():
     assert_equal(neighbors.dtype, object)
 
     # Compare with exact neighbor search
-    query = X[rng.randint(0, n_samples)]
+    query = X[rng.randint(0, n_samples)].reshape(1, -1)
     mean_dist = np.mean(pairwise_distances(query, X, metric='cosine'))
     nbrs = NearestNeighbors(algorithm='brute', metric='cosine').fit(X)
 
@@ -230,7 +231,7 @@ def test_radius_neighbors_boundary_handling():
     lsfh = LSHForest(min_hash_match=0, n_candidates=n_points).fit(X)
 
     # define a query aligned with the first axis
-    query = [1., 0.]
+    query = [[1., 0.]]
 
     # Compute the exact cosine distances of the query to the four points of
     # the dataset
@@ -289,7 +290,7 @@ def test_distances():
 
     for i in range(n_iter):
         n_neighbors = rng.randint(0, n_samples)
-        query = X[rng.randint(0, n_samples)]
+        query = X[rng.randint(0, n_samples)].reshape(1, -1)
         distances, neighbors = lshf.kneighbors(query,
                                                n_neighbors=n_neighbors,
                                                return_distance=True)
@@ -400,7 +401,7 @@ def test_candidates():
     # requested number of neighbors.
     X_train = np.array([[5, 5, 2], [21, 5, 5], [1, 1, 1], [8, 9, 1],
                         [6, 10, 2]], dtype=np.float32)
-    X_test = np.array([7, 10, 3], dtype=np.float32)
+    X_test = np.array([7, 10, 3], dtype=np.float32).reshape(1, -1)
 
     # For zero candidates
     lshf = LSHForest(min_hash_match=32)
diff --git a/sklearn/neighbors/tests/test_ball_tree.py b/sklearn/neighbors/tests/test_ball_tree.py
index 76cc8999c7c40..0f5968cd63897 100644
--- a/sklearn/neighbors/tests/test_ball_tree.py
+++ b/sklearn/neighbors/tests/test_ball_tree.py
@@ -107,7 +107,7 @@ def test_ball_tree_query_radius(n_samples=100, n_features=10):
     rad = np.sqrt(((X - query_pt) ** 2).sum(1))
 
     for r in np.linspace(rad[0], rad[-1], 100):
-        ind = bt.query_radius(query_pt, r + eps)[0]
+        ind = bt.query_radius([query_pt], r + eps)[0]
         i = np.where(rad <= r + eps)[0]
 
         ind.sort()
@@ -126,7 +126,7 @@ def test_ball_tree_query_radius_distance(n_samples=100, n_features=10):
     rad = np.sqrt(((X - query_pt) ** 2).sum(1))
 
     for r in np.linspace(rad[0], rad[-1], 100):
-        ind, dist = bt.query_radius(query_pt, r + eps, return_distance=True)
+        ind, dist = bt.query_radius([query_pt], r + eps, return_distance=True)
 
         ind = ind[0]
         dist = dist[0]
diff --git a/sklearn/neighbors/tests/test_kd_tree.py b/sklearn/neighbors/tests/test_kd_tree.py
index 4c2433ff538de..50ece8f97a271 100644
--- a/sklearn/neighbors/tests/test_kd_tree.py
+++ b/sklearn/neighbors/tests/test_kd_tree.py
@@ -58,7 +58,7 @@ def test_kd_tree_query_radius(n_samples=100, n_features=10):
     rad = np.sqrt(((X - query_pt) ** 2).sum(1))
 
     for r in np.linspace(rad[0], rad[-1], 100):
-        ind = kdt.query_radius(query_pt, r + eps)[0]
+        ind = kdt.query_radius([query_pt], r + eps)[0]
         i = np.where(rad <= r + eps)[0]
 
         ind.sort()
@@ -77,7 +77,7 @@ def test_kd_tree_query_radius_distance(n_samples=100, n_features=10):
     rad = np.sqrt(((X - query_pt) ** 2).sum(1))
 
     for r in np.linspace(rad[0], rad[-1], 100):
-        ind, dist = kdt.query_radius(query_pt, r + eps, return_distance=True)
+        ind, dist = kdt.query_radius([query_pt], r + eps, return_distance=True)
 
         ind = ind[0]
         dist = dist[0]
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index 12373b19f6166..3a4b787f76a3d 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -436,7 +436,7 @@ def test_radius_neighbors_boundary_handling():
     for algorithm in ALGORITHMS:
         nbrs = neighbors.NearestNeighbors(radius=radius,
                                           algorithm=algorithm).fit(X)
-        results = nbrs.radius_neighbors([0.0], return_distance=False)
+        results = nbrs.radius_neighbors([[0.0]], return_distance=False)
         assert_equal(results.shape, (1,))
         assert_equal(results.dtype, object)
         assert_array_equal(results[0], [0, 1])
@@ -901,7 +901,7 @@ def test_neighbors_badargs():
         nbrs.fit(X, y)
         assert_raises(ValueError,
                       nbrs.predict,
-                      [])
+                      [[]])
         if (isinstance(cls, neighbors.KNeighborsClassifier) or
                 isinstance(cls, neighbors.KNeighborsRegressor)):
             nbrs = cls(n_neighbors=-1)
@@ -916,6 +916,15 @@ def test_neighbors_badargs():
                   nbrs.radius_neighbors_graph,
                   X, mode='blah')
 
+    nbrs = neighbors.NearestNeighbors().fit(X)
+
+    assert_raises(ValueError,
+                  nbrs.kneighbors_graph,
+                  X, mode='blah')
+    assert_raises(ValueError,
+                  nbrs.radius_neighbors_graph,
+                  X, mode='blah')
+
 
 def test_neighbors_metrics(n_samples=20, n_features=3,
                            n_query_pts=2, n_neighbors=5):
diff --git a/sklearn/neighbors/unsupervised.py b/sklearn/neighbors/unsupervised.py
index d458a6533a90e..5d7de4a12e7cf 100644
--- a/sklearn/neighbors/unsupervised.py
+++ b/sklearn/neighbors/unsupervised.py
@@ -93,7 +93,7 @@ class NearestNeighbors(NeighborsBase, KNeighborsMixin,
       ... #doctest: +ELLIPSIS
       array([[2, 0]]...)
 
-      >>> rng = neigh.radius_neighbors([0, 0, 1.3], 0.4, return_distance=False)
+      >>> rng = neigh.radius_neighbors([[0, 0, 1.3]], 0.4, return_distance=False)
       >>> np.asarray(rng[0][0])
       array(2)
 
diff --git a/sklearn/neural_network/tests/test_rbm.py b/sklearn/neural_network/tests/test_rbm.py
index a735954fcb5be..bf171b7fd2555 100644
--- a/sklearn/neural_network/tests/test_rbm.py
+++ b/sklearn/neural_network/tests/test_rbm.py
@@ -161,7 +161,7 @@ def test_score_samples():
     # Test numerical stability (#2785): would previously generate infinities
     # and crash with an exception.
     with np.errstate(under='ignore'):
-        rbm1.score_samples(np.arange(1000) * 100)
+        rbm1.score_samples([np.arange(1000) * 100])
 
 
 def test_rbm_verbose():
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index cd528c07e240f..a78d9831e59e1 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -46,6 +46,10 @@
     'minmax_scale',
 ]
 
+DEPRECATION_MSG_1D = "Passing 1d arrays as data is deprecated and "
+"will be removed in 0.18. Reshape your data either using"
+"X.reshape(-1, 1) if your data has a single feature or"
+"X.reshape(1, -1) if it contains a single sample."
 
 def _mean_and_std(X, axis=0, with_mean=True, with_std=True):
     """Compute mean and std deviation for centering, scaling.
@@ -270,6 +274,10 @@ def transform(self, X):
         check_is_fitted(self, 'scale_')
 
         X = check_array(X, copy=self.copy, ensure_2d=False)
+
+        if X.ndim == 1:
+            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
+
         X *= self.scale_
         X += self.min_
         return X
@@ -439,6 +447,10 @@ def transform(self, X, y=None, copy=None):
         X = check_array(X, accept_sparse='csr', copy=copy,
                         ensure_2d=False, warn_on_dtype=True,
                         estimator=self, dtype=FLOAT_DTYPES)
+
+        if X.ndim == 1:
+            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
+
         if sparse.issparse(X):
             if self.with_mean:
                 raise ValueError(
@@ -544,6 +556,10 @@ def transform(self, X, y=None):
         check_is_fitted(self, 'scale_')
         X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
                         ensure_2d=False, estimator=self, dtype=FLOAT_DTYPES)
+
+        if X.ndim == 1:
+            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
+
         if sparse.issparse(X):
             if X.shape[0] == 1:
                 inplace_row_scale(X, 1.0 / self.scale_)
@@ -672,6 +688,10 @@ def _check_array(self, X, copy):
         """Makes sure centering is not enabled for sparse matrices."""
         X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
                         ensure_2d=False, estimator=self, dtype=FLOAT_DTYPES)
+
+        if X.ndim == 1:
+            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
+
         if sparse.issparse(X):
             if self.with_centering:
                 raise ValueError(
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index b6cb4bcc0e840..5308bd18eae2a 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -16,6 +16,7 @@
 from sklearn.utils.testing import assert_false
 from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import assert_no_warnings
+from sklearn.utils.testing import ignore_warnings
 
 from sklearn.utils.sparsefuncs import mean_variance_axis
 from sklearn.preprocessing.data import _transform_selected
@@ -77,6 +78,7 @@ def test_polynomial_features():
     assert_array_almost_equal(X_poly, P2[:, [0, 1, 2, 4]])
 
 
+@ignore_warnings
 def test_scaler_1d():
     # Test scaling of dataset along single axis
     rng = np.random.RandomState(0)
@@ -275,6 +277,7 @@ def test_minmax_scale_axis1():
     assert_array_almost_equal(np.max(X_trans, axis=1), 1)
 
 
+@ignore_warnings
 def test_min_max_scaler_1d():
     # Test scaling of dataset along single axis
     rng = np.random.RandomState(0)
diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py
index 855f6c0857959..d963ad2428135 100644
--- a/sklearn/semi_supervised/label_propagation.py
+++ b/sklearn/semi_supervised/label_propagation.py
@@ -61,7 +61,7 @@
 from ..metrics.pairwise import rbf_kernel
 from ..utils.graph import graph_laplacian
 from ..utils.extmath import safe_sparse_dot
-from ..utils.validation import check_X_y, check_is_fitted
+from ..utils.validation import check_X_y, check_is_fitted, check_array
 from ..externals import six
 from ..neighbors.unsupervised import NearestNeighbors
 
@@ -173,10 +173,8 @@ class labels
         """
         check_is_fitted(self, 'X_')
 
-        if sparse.isspmatrix(X):
-            X_2d = X
-        else:
-            X_2d = np.atleast_2d(X)
+        X_2d = check_array(X, accept_sparse = ['csc', 'csr', 'coo', 'dok',
+                        'bsr', 'lil', 'dia'])
         weight_matrices = self._get_kernel(self.X_, X_2d)
         if self.kernel == 'knn':
             probabilities = []
diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py
index ad3ec25859f87..07b145d6175d9 100644
--- a/sklearn/svm/tests/test_sparse.py
+++ b/sklearn/svm/tests/test_sparse.py
@@ -246,11 +246,11 @@ def test_sample_weights():
     # Test weights on individual samples
     clf = svm.SVC()
     clf.fit(X_sp, Y)
-    assert_array_equal(clf.predict(X[2]), [1.])
+    assert_array_equal(clf.predict([X[2]]), [1.])
 
     sample_weight = [.1] * 3 + [10] * 3
     clf.fit(X_sp, Y, sample_weight=sample_weight)
-    assert_array_equal(clf.predict(X[2]), [2.])
+    assert_array_equal(clf.predict([X[2]]), [2.])
 
 
 def test_sparse_liblinear_intercept_handling():
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 10213e990e210..c16009e70d723 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -86,6 +86,7 @@ def test_libsvm_iris():
     assert_array_equal(pred, pred2)
 
 
+@ignore_warnings
 def test_single_sample_1d():
     # Test whether SVCs work on a single sample given as a 1-d array
 
@@ -399,11 +400,11 @@ def test_sample_weights():
     # TODO: check on NuSVR, OneClass, etc.
     clf = svm.SVC()
     clf.fit(X, Y)
-    assert_array_equal(clf.predict(X[2]), [1.])
+    assert_array_equal(clf.predict([X[2]]), [1.])
 
     sample_weight = [.1] * 3 + [10] * 3
     clf.fit(X, Y, sample_weight=sample_weight)
-    assert_array_equal(clf.predict(X[2]), [2.])
+    assert_array_equal(clf.predict([X[2]]), [2.])
 
     # test that rescaling all samples is the same as changing C
     clf = svm.SVC()
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index c129f10c885e8..4b4337edcc102 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -30,7 +30,9 @@
     check_class_weight_balanced_linear_classifier,
     check_transformer_n_iter,
     check_non_transformer_estimators_n_iter,
-    check_get_params_invariance)
+    check_get_params_invariance,
+    check_fit2d_predict1d,
+    check_fit1d_1sample)
 
 
 def test_all_estimator_no_base_class():
diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py
index b3555da25b012..4f2dc86bc52bb 100644
--- a/sklearn/tests/test_dummy.py
+++ b/sklearn/tests/test_dummy.py
@@ -94,10 +94,10 @@ def test_most_frequent_and_prior_strategy():
         _check_predict_proba(clf, X, y)
 
         if strategy == "prior":
-            assert_array_equal(clf.predict_proba(X[0]),
+            assert_array_equal(clf.predict_proba([X[0]]),
                                clf.class_prior_.reshape((1, -1)))
         else:
-            assert_array_equal(clf.predict_proba(X[0]),
+            assert_array_equal(clf.predict_proba([X[0]]),
                                clf.class_prior_.reshape((1, -1)) > 0.5)
 
 
diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
index 0e180b461b01a..3af99fc903bd7 100644
--- a/sklearn/tests/test_naive_bayes.py
+++ b/sklearn/tests/test_naive_bayes.py
@@ -253,8 +253,8 @@ def test_discretenb_predict_proba():
     for cls, X in zip([BernoulliNB, MultinomialNB],
                       [X_bernoulli, X_multinomial]):
         clf = cls().fit(X, y)
-        assert_equal(clf.predict(X[-1]), 2)
-        assert_equal(clf.predict_proba(X[0]).shape, (1, 2))
+        assert_equal(clf.predict([X[-1]]), 2)
+        assert_equal(clf.predict_proba([X[0]]).shape, (1, 2))
         assert_array_almost_equal(clf.predict_proba(X[:2]).sum(axis=1),
                                   np.array([1., 1.]), 6)
 
@@ -263,10 +263,10 @@ def test_discretenb_predict_proba():
     for cls, X in zip([BernoulliNB, MultinomialNB],
                       [X_bernoulli, X_multinomial]):
         clf = cls().fit(X, y)
-        assert_equal(clf.predict_proba(X[0]).shape, (1, 3))
+        assert_equal(clf.predict_proba([X[0]]).shape, (1, 3))
         assert_equal(clf.predict_proba(X[:2]).shape, (2, 3))
-        assert_almost_equal(np.sum(clf.predict_proba(X[1])), 1)
-        assert_almost_equal(np.sum(clf.predict_proba(X[-1])), 1)
+        assert_almost_equal(np.sum(clf.predict_proba([X[1]])), 1)
+        assert_almost_equal(np.sum(clf.predict_proba([X[-1]])), 1)
         assert_almost_equal(np.sum(np.exp(clf.class_log_prior_)), 1)
         assert_almost_equal(np.sum(np.exp(clf.intercept_)), 1)
 
@@ -351,7 +351,7 @@ def test_sample_weight_mnb():
     clf.fit([[1, 2], [1, 2], [1, 0]],
             [0, 0, 1],
             sample_weight=[1, 1, 4])
-    assert_array_equal(clf.predict([1, 0]), [1])
+    assert_array_equal(clf.predict([[1, 0]]), [1])
     positive_prior = np.exp(clf.intercept_[0])
     assert_array_almost_equal([1 - positive_prior, positive_prior],
                               [1 / 3., 2 / 3.])
@@ -459,7 +459,7 @@ def test_bnb():
 
     # Testing data point is:
     # Chinese Chinese Chinese Tokyo Japan
-    X_test = np.array([0, 1, 1, 0, 0, 1])
+    X_test = np.array([[0, 1, 1, 0, 0, 1]])
 
     # Check the predictive probabilities are correct
     unnorm_predict_proba = np.array([[0.005183999999999999,
diff --git a/sklearn/tests/test_random_projection.py b/sklearn/tests/test_random_projection.py
index e64fed90ece16..527faede75f31 100644
--- a/sklearn/tests/test_random_projection.py
+++ b/sklearn/tests/test_random_projection.py
@@ -211,7 +211,7 @@ def test_sparse_random_projection_transformer_invalid_density():
 def test_random_projection_transformer_invalid_input():
     for RandomProjection in all_RandomProjection:
         assert_raises(ValueError,
-                      RandomProjection(n_components='auto').fit, [0, 1, 2])
+                      RandomProjection(n_components='auto').fit, [[0, 1, 2]])
 
         assert_raises(ValueError,
                       RandomProjection(n_components=-10).fit, data)
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 3cdae723e873f..2e15cce420796 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -29,6 +29,7 @@
 from sklearn.utils.testing import raises
 from sklearn.utils.validation import check_random_state
 from sklearn.utils.validation import NotFittedError
+from sklearn.utils.testing import ignore_warnings
 
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.tree import DecisionTreeRegressor
@@ -497,7 +498,7 @@ def test_error():
         assert_raises(NotFittedError, est.predict_proba, X)
 
         est.fit(X, y)
-        X2 = [-2, -1, 1]  # wrong feature shape for sample
+        X2 = [[-2, -1, 1]]  # wrong feature shape for sample
         assert_raises(ValueError, est.predict_proba, X2)
 
     for name, TreeEstimator in ALL_TREES.items():
@@ -1228,6 +1229,7 @@ def test_explicit_sparse_zeros():
         yield (check_explicit_sparse_zeros, tree)
 
 
+@ignore_warnings
 def check_raise_error_on_1d_input(name):
     TreeEstimator = ALL_TREES[name]
 
@@ -1239,9 +1241,10 @@ def check_raise_error_on_1d_input(name):
 
     est = TreeEstimator(random_state=0)
     est.fit(X_2d, y)
-    assert_raises(ValueError, est.predict, X)
+    assert_raises(ValueError, est.predict, [X])
 
 
+@ignore_warnings
 def test_1d_input():
     for name in ALL_TREES:
         yield check_raise_error_on_1d_input, name
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index bcb1773399b5d..398446404a003 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -351,7 +351,7 @@ def safe_sqr(X, copy=True):
     -------
     X ** 2 : element wise square
     """
-    X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
+    X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], ensure_2d=False)
     if issparse(X):
         if copy:
             X = X.copy()
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index a1d1c0351503a..2ff07f13dae40 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -28,6 +28,8 @@
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import SkipTest
 from sklearn.utils.testing import ignore_warnings
+from sklearn.utils.testing import assert_warns
+
 
 from sklearn.base import (clone, ClassifierMixin, RegressorMixin,
                           TransformerMixin, ClusterMixin, BaseEstimator)
@@ -169,6 +171,11 @@ def _yield_all_checks(name, Estimator):
     if issubclass(Estimator, ClusterMixin):
         for check in _yield_clustering_checks(name, Estimator):
             yield check
+    yield check_fit2d_predict1d
+    yield check_fit2d_1sample
+    yield check_fit2d_1feature
+    yield check_fit1d_1feature
+    yield check_fit1d_1sample
 
 
 def check_estimator(Estimator):
@@ -331,6 +338,124 @@ def check_dtype_object(name, Estimator):
     assert_raises_regex(TypeError, msg, estimator.fit, X, y)
 
 
+@ignore_warnings
+def check_fit2d_predict1d(name, Estimator):
+    # check by fitting a 2d array and prediting with a 1d array
+    rnd = np.random.RandomState(0)
+    X = 3 * rnd.uniform(size=(20, 3))
+    y = X[:, 0].astype(np.int)
+    y = multioutput_estimator_convert_y_2d(name, y)
+    estimator = Estimator()
+    set_fast_parameters(estimator)
+
+    if hasattr(estimator, "n_components"):
+        estimator.n_components = 1
+    if hasattr(estimator, "n_clusters"):
+        estimator.n_clusters = 1
+
+    set_random_state(estimator, 1)
+    estimator.fit(X, y)
+
+    for method in ["predict", "transform", "decision_function",
+                   "predict_proba"]:
+        if hasattr(estimator, method):
+            try:
+                assert_warns(DeprecationWarning,
+                             getattr(estimator, method), X[0])
+            except ValueError:
+                pass
+
+
+@ignore_warnings
+def check_fit2d_1sample(name, Estimator):
+    # check by fitting a 2d array and prediting with a 1d array
+    rnd = np.random.RandomState(0)
+    X = 3 * rnd.uniform(size=(1, 10))
+    y = X[:, 0].astype(np.int)
+    y = multioutput_estimator_convert_y_2d(name, y)
+    estimator = Estimator()
+    set_fast_parameters(estimator)
+
+    if hasattr(estimator, "n_components"):
+        estimator.n_components = 1
+    if hasattr(estimator, "n_clusters"):
+        estimator.n_clusters = 1
+
+    set_random_state(estimator, 1)
+    try:
+        estimator.fit(X, y)
+    except ValueError:
+        pass
+
+
+@ignore_warnings
+def check_fit2d_1feature(name, Estimator):
+    # check by fitting a 2d array and prediting with a 1d array
+    rnd = np.random.RandomState(0)
+    X = 3 * rnd.uniform(size=(10, 1))
+    y = X[:, 0].astype(np.int)
+    y = multioutput_estimator_convert_y_2d(name, y)
+    estimator = Estimator()
+    set_fast_parameters(estimator)
+
+    if hasattr(estimator, "n_components"):
+        estimator.n_components = 1
+    if hasattr(estimator, "n_clusters"):
+        estimator.n_clusters = 1
+
+    set_random_state(estimator, 1)
+    try:
+        estimator.fit(X, y)
+    except ValueError:
+        pass
+
+
+@ignore_warnings
+def check_fit1d_1feature(name, Estimator):
+    # check fitting 1d array with 1 feature
+    rnd = np.random.RandomState(0)
+    X = 3 * rnd.uniform(size=(20))
+    y = X.astype(np.int)
+    y = multioutput_estimator_convert_y_2d(name, y)
+    estimator = Estimator()
+    set_fast_parameters(estimator)
+
+    if hasattr(estimator, "n_components"):
+        estimator.n_components = 1
+    if hasattr(estimator, "n_clusters"):
+        estimator.n_clusters = 1
+
+    set_random_state(estimator, 1)
+
+    try:
+        estimator.fit(X, y)
+    except ValueError:
+        pass
+
+
+@ignore_warnings
+def check_fit1d_1sample(name, Estimator):
+    # check fitting 1d array with 1 feature
+    rnd = np.random.RandomState(0)
+    X = 3 * rnd.uniform(size=(20))
+    y = np.array([1])
+    y = multioutput_estimator_convert_y_2d(name, y)
+    estimator = Estimator()
+    set_fast_parameters(estimator)
+
+    if hasattr(estimator, "n_components"):
+        estimator.n_components = 1
+    if hasattr(estimator, "n_clusters"):
+        estimator.n_clusters = 1
+
+    set_random_state(estimator, 1)
+
+    try:
+        estimator.fit(X, y)
+    except ValueError :
+        pass
+
+
 def check_transformer_general(name, Transformer):
     X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                       random_state=0, n_features=2, cluster_std=0.1)
@@ -518,8 +643,8 @@ def check_estimators_empty_data_messages(name, Estimator):
     # the following y should be accepted by both classifiers and regressors
     # and ignored by unsupervised models
     y = multioutput_estimator_convert_y_2d(name, np.array([1, 0, 1]))
-    msg = "0 feature(s) (shape=(3, 0)) while a minimum of 1 is required."
-    assert_raise_message(ValueError, msg, e.fit, X_zero_features, y)
+    msg = "0 feature\(s\) \(shape=\(3, 0\)\) while a minimum of \d* is required."
+    assert_raises_regex(ValueError, msg, e.fit, X_zero_features, y)
 
 
 def check_estimators_nan_inf(name, Estimator):
@@ -950,7 +1075,8 @@ def check_regressors_int(name, Regressor):
 
 def check_regressors_train(name, Regressor):
     X, y = _boston_subset()
-    y = StandardScaler().fit_transform(y)   # X is already scaled
+    y = StandardScaler().fit_transform(y.reshape(-1, 1))  # X is already scaled
+    y = y.ravel()
     y = multioutput_estimator_convert_y_2d(name, y)
     rnd = np.random.RandomState(0)
     # catch deprecation warnings
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 48fd151103bd4..d583841536534 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -582,15 +582,15 @@ def log_logistic(X, out=None):
 
     Parameters
     ----------
-    X: array-like, shape (M, N)
+    X: array-like, shape (M, N) or (M, )
         Argument to the logistic function
 
-    out: array-like, shape: (M, N), optional:
+    out: array-like, shape: (M, N) or (M, ), optional:
         Preallocated output array.
 
     Returns
     -------
-    out: array, shape (M, N)
+    out: array, shape (M, N) or (M, )
         Log of the logistic function evaluated at every point in x
 
     Notes
@@ -599,8 +599,10 @@ def log_logistic(X, out=None):
     http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/
     """
     is_1d = X.ndim == 1
+    X = np.atleast_2d(X)
     X = check_array(X, dtype=np.float)
 
+
     n_samples, n_features = X.shape
 
     if out is None:
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 216825a4c1976..75b594acee630 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -14,6 +14,7 @@
 from sklearn.utils.testing import assert_no_warnings
 from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import assert_warns
+from sklearn.utils.testing import ignore_warnings
 from sklearn.utils import as_float_array, check_array, check_symmetric
 from sklearn.utils import check_X_y
 from sklearn.utils.mocking import MockDataFrame
@@ -83,13 +84,13 @@ def test_memmap():
     asflt = lambda x: as_float_array(x, copy=False)
 
     with NamedTemporaryFile(prefix='sklearn-test') as tmp:
-        M = np.memmap(tmp, shape=100, dtype=np.float32)
+        M = np.memmap(tmp, shape=(10, 10), dtype=np.float32)
         M[:] = 0
 
         for f in (check_array, np.asarray, asflt):
             X = f(M)
             X[:] = 1
-            assert_array_equal(X.ravel(), M)
+            assert_array_equal(X.ravel(), M.ravel())
             X[:] = 0
 
 
@@ -112,6 +113,7 @@ def test_ordering():
     assert_false(X.data.flags['C_CONTIGUOUS'])
 
 
+@ignore_warnings
 def test_check_array():
     # accept_sparse == None
     # raise error on sparse inputs
@@ -119,6 +121,8 @@ def test_check_array():
     X_csr = sp.csr_matrix(X)
     assert_raises(TypeError, check_array, X_csr)
     # ensure_2d
+    # This might not be needed
+    assert_warns(DeprecationWarning, check_array, [0, 1, 2])
     X_array = check_array([0, 1, 2])
     assert_equal(X_array.ndim, 2)
     X_array = check_array([0, 1, 2], ensure_2d=False)
@@ -313,7 +317,7 @@ def test_check_array_dtype_warning():
 def test_check_array_min_samples_and_features_messages():
     # empty list is considered 2D by default:
     msg = "0 feature(s) (shape=(1, 0)) while a minimum of 1 is required."
-    assert_raise_message(ValueError, msg, check_array, [])
+    assert_raise_message(ValueError, msg, check_array, [[]])
 
     # If considered a 1D collection when ensure_2d=False, then the minimum
     # number of samples will break:
@@ -326,8 +330,9 @@ def test_check_array_min_samples_and_features_messages():
 
     # But this works if the input data is forced to look like a 2 array with
     # one sample and one feature:
-    X_checked = check_array(42, ensure_2d=True)
-    assert_array_equal(np.array([[42]]), X_checked)
+    # This might no loner be needed
+    #X_checked = check_array([42], ensure_2d=True)
+    #assert_array_equal(np.array([[42]]), X_checked)
 
     # Simulate a model that would need at least 2 samples to be well defined
     X = np.ones((1, 10))
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index ef5e1d742aa49..04b27b996c6c4 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -361,11 +361,20 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None,
         array = _ensure_sparse_format(array, accept_sparse, dtype, copy,
                                       force_all_finite)
     else:
+        array = np.array(array, dtype=dtype, order=order, copy=copy)
+
         if ensure_2d:
+            if array.ndim == 1:
+                warnings.warn("Passing 1d arrays as data is deprecated and "
+                "will be removed in 0.18. Reshape your data either using"
+                "X.reshape(-1, 1) if your data has a single feature or"
+                "X.reshape(1, -1) if it contains a single sample.",
+                              DeprecationWarning)
             array = np.atleast_2d(array)
+            # To ensure that array flags are maintained
+            array = np.array(array, dtype=dtype, order=order, copy=copy)
 
-        array = np.array(array, dtype=dtype, order=order, copy=copy)
-        # make sure we actually converted to numeric:
+        # make sure we acually converted to numeric:
         if dtype_numeric and array.dtype.kind == "O":
             array = array.astype(np.float64)
         if not allow_nd and array.ndim >= 3:

From 81636eee234cde025c12abb49d85c5bd344b9a81 Mon Sep 17 00:00:00 2001
From: Vighnesh Birodkar <vighneshbirodkar@gmail.com>
Date: Fri, 4 Sep 2015 14:38:50 -0400
Subject: [PATCH 2/3] Improved array indexing for readability and other fixes

---
 doc/modules/model_persistence.rst           |  2 +-
 doc/tutorial/basic/tutorial.rst             |  2 +-
 sklearn/covariance/graph_lasso_.py          |  1 +
 sklearn/covariance/tests/test_covariance.py |  2 +-
 sklearn/naive_bayes.py                      |  2 +-
 sklearn/neighbors/tests/test_neighbors.py   | 17 ++---------------
 sklearn/neighbors/unsupervised.py           |  4 ++--
 sklearn/preprocessing/data.py               |  4 ++--
 sklearn/tests/test_naive_bayes.py           |  4 ++--
 sklearn/utils/extmath.py                    |  2 +-
 sklearn/utils/tests/test_validation.py      |  7 +++----
 11 files changed, 17 insertions(+), 30 deletions(-)

diff --git a/doc/modules/model_persistence.rst b/doc/modules/model_persistence.rst
index 403257fca0817..dfa0d4646638e 100644
--- a/doc/modules/model_persistence.rst
+++ b/doc/modules/model_persistence.rst
@@ -30,7 +30,7 @@ persistence model, namely `pickle <http://docs.python.org/library/pickle.html>`_
   >>> import pickle
   >>> s = pickle.dumps(clf)
   >>> clf2 = pickle.loads(s)
-  >>> clf2.predict([X[0]])
+  >>> clf2.predict(X[0:1])
   array([0])
   >>> y[0]
   0
diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
index 20511d8ca7a4c..6213fd340ad7e 100644
--- a/doc/tutorial/basic/tutorial.rst
+++ b/doc/tutorial/basic/tutorial.rst
@@ -223,7 +223,7 @@ persistence model, namely `pickle <http://docs.python.org/library/pickle.html>`_
   >>> import pickle
   >>> s = pickle.dumps(clf)
   >>> clf2 = pickle.loads(s)
-  >>> clf2.predict([X[0]])
+  >>> clf2.predict(X[0:1])
   array([0])
   >>> y[0]
   0
diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py
index 3df995f1bbe33..f104890946251 100644
--- a/sklearn/covariance/graph_lasso_.py
+++ b/sklearn/covariance/graph_lasso_.py
@@ -557,6 +557,7 @@ def fit(self, X, y=None):
         X : ndarray, shape (n_samples, n_features)
             Data from which to compute the covariance estimate
         """
+        # Covariance does not make sense for a single feature
         X = check_array(X, ensure_min_features=2)
         if self.assume_centered:
             self.location_ = np.zeros(X.shape[1])
diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py
index 79e6f1ae6ed43..526f48f23f94f 100644
--- a/sklearn/covariance/tests/test_covariance.py
+++ b/sklearn/covariance/tests/test_covariance.py
@@ -220,7 +220,7 @@ def test_oas():
     assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)
 
     # test with n_features = 1
-    X_1d = X[:, 0, np.newaxis]
+    X_1d = X[:, 0:1]
     oa = OAS(assume_centered=True)
     oa.fit(X_1d)
     oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 057d0916e5c42..ed663945974a1 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -619,7 +619,7 @@ class MultinomialNB(BaseDiscreteNB):
     >>> clf = MultinomialNB()
     >>> clf.fit(X, y)
     MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
-    >>> print(clf.predict([X[2]]))
+    >>> print(clf.predict(X[2:3]))
     [3]
 
     Notes
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index 3a4b787f76a3d..91a86d2d3ba7f 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -909,21 +909,8 @@ def test_neighbors_badargs():
 
     nbrs = neighbors.NearestNeighbors().fit(X)
 
-    assert_raises(ValueError,
-                  nbrs.kneighbors_graph,
-                  X, mode='blah')
-    assert_raises(ValueError,
-                  nbrs.radius_neighbors_graph,
-                  X, mode='blah')
-
-    nbrs = neighbors.NearestNeighbors().fit(X)
-
-    assert_raises(ValueError,
-                  nbrs.kneighbors_graph,
-                  X, mode='blah')
-    assert_raises(ValueError,
-                  nbrs.radius_neighbors_graph,
-                  X, mode='blah')
+    assert_raises(ValueError, nbrs.kneighbors_graph, X, mode='blah')
+    assert_raises(ValueError, nbrs.radius_neighbors_graph, X, mode='blah')
 
 
 def test_neighbors_metrics(n_samples=20, n_features=3,
diff --git a/sklearn/neighbors/unsupervised.py b/sklearn/neighbors/unsupervised.py
index 5d7de4a12e7cf..590069b9ed55e 100644
--- a/sklearn/neighbors/unsupervised.py
+++ b/sklearn/neighbors/unsupervised.py
@@ -93,8 +93,8 @@ class NearestNeighbors(NeighborsBase, KNeighborsMixin,
       ... #doctest: +ELLIPSIS
       array([[2, 0]]...)
 
-      >>> rng = neigh.radius_neighbors([[0, 0, 1.3]], 0.4, return_distance=False)
-      >>> np.asarray(rng[0][0])
+      >>> nbrs = neigh.radius_neighbors([[0, 0, 1.3]], 0.4, return_distance=False)
+      >>> np.asarray(nbrs[0][0])
       array(2)
 
     See also
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index a78d9831e59e1..2fa78ee69ec3b 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -46,10 +46,10 @@
     'minmax_scale',
 ]
 
-DEPRECATION_MSG_1D = "Passing 1d arrays as data is deprecated and "
+DEPRECATION_MSG_1D = ("Passing 1d arrays as data is deprecated and "
 "will be removed in 0.18. Reshape your data either using"
 "X.reshape(-1, 1) if your data has a single feature or"
-"X.reshape(1, -1) if it contains a single sample."
+"X.reshape(1, -1) if it contains a single sample.")
 
 def _mean_and_std(X, axis=0, with_mean=True, with_std=True):
     """Compute mean and std deviation for centering, scaling.
diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
index 3af99fc903bd7..abee98e3a8cf7 100644
--- a/sklearn/tests/test_naive_bayes.py
+++ b/sklearn/tests/test_naive_bayes.py
@@ -253,7 +253,7 @@ def test_discretenb_predict_proba():
     for cls, X in zip([BernoulliNB, MultinomialNB],
                       [X_bernoulli, X_multinomial]):
         clf = cls().fit(X, y)
-        assert_equal(clf.predict([X[-1]]), 2)
+        assert_equal(clf.predict(X[-1:]), 2)
         assert_equal(clf.predict_proba([X[0]]).shape, (1, 2))
         assert_array_almost_equal(clf.predict_proba(X[:2]).sum(axis=1),
                                   np.array([1., 1.]), 6)
@@ -263,7 +263,7 @@ def test_discretenb_predict_proba():
     for cls, X in zip([BernoulliNB, MultinomialNB],
                       [X_bernoulli, X_multinomial]):
         clf = cls().fit(X, y)
-        assert_equal(clf.predict_proba([X[0]]).shape, (1, 3))
+        assert_equal(clf.predict_proba(X[0:1]).shape, (1, 3))
         assert_equal(clf.predict_proba(X[:2]).shape, (2, 3))
         assert_almost_equal(np.sum(clf.predict_proba([X[1]])), 1)
         assert_almost_equal(np.sum(clf.predict_proba([X[-1]])), 1)
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index d583841536534..ff0ea1a06582d 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -600,7 +600,7 @@ def log_logistic(X, out=None):
     """
     is_1d = X.ndim == 1
     X = np.atleast_2d(X)
-    X = check_array(X, dtype=np.float)
+    X = check_array(X, dtype=np.float64)
 
 
     n_samples, n_features = X.shape
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 75b594acee630..43a405a8d1093 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -121,7 +121,6 @@ def test_check_array():
     X_csr = sp.csr_matrix(X)
     assert_raises(TypeError, check_array, X_csr)
     # ensure_2d
-    # This might not be needed
     assert_warns(DeprecationWarning, check_array, [0, 1, 2])
     X_array = check_array([0, 1, 2])
     assert_equal(X_array.ndim, 2)
@@ -330,9 +329,9 @@ def test_check_array_min_samples_and_features_messages():
 
     # But this works if the input data is forced to look like a 2 array with
     # one sample and one feature:
-    # This might no loner be needed
-    #X_checked = check_array([42], ensure_2d=True)
-    #assert_array_equal(np.array([[42]]), X_checked)
+    X_checked = assert_warns(DeprecationWarning, check_array, [42],
+                             ensure_2d=True)
+    assert_array_equal(np.array([[42]]), X_checked)
 
     # Simulate a model that would need at least 2 samples to be well defined
     X = np.ones((1, 10))

From e064d7cd3db4b966ec4bc591aac43f396d777301 Mon Sep 17 00:00:00 2001
From: Vighnesh Birodkar <vighneshbirodkar@gmail.com>
Date: Fri, 4 Sep 2015 15:55:38 -0400
Subject: [PATCH 3/3] changed some array addressing syntax and added comment

---
 doc/tutorial/basic/tutorial.rst    | 2 +-
 sklearn/covariance/graph_lasso_.py | 3 +++
 sklearn/naive_bayes.py             | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
index 6213fd340ad7e..ce6af679ec461 100644
--- a/doc/tutorial/basic/tutorial.rst
+++ b/doc/tutorial/basic/tutorial.rst
@@ -185,7 +185,7 @@ Now you can predict new values, in particular, we can ask to the
 classifier what is the digit of our last image in the ``digits`` dataset,
 which we have not used to train the classifier::
 
-  >>> clf.predict([digits.data[-1]])
+  >>> clf.predict(digits.data[-1:])
   array([8])
 
 The corresponding image is the following:
diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py
index f104890946251..91b98a8dc6bfc 100644
--- a/sklearn/covariance/graph_lasso_.py
+++ b/sklearn/covariance/graph_lasso_.py
@@ -334,7 +334,10 @@ def __init__(self, alpha=.01, mode='cd', tol=1e-4, enet_tol=1e-4,
         self.store_precision = True
 
     def fit(self, X, y=None):
+
+        # Covariance does not make sense for a single feature
         X = check_array(X, ensure_min_features=2, ensure_min_samples=2)
+
         if self.assume_centered:
             self.location_ = np.zeros(X.shape[1])
         else:
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index ed663945974a1..eddc16c131ad9 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -717,7 +717,7 @@ class BernoulliNB(BaseDiscreteNB):
     >>> clf = BernoulliNB()
     >>> clf.fit(X, Y)
     BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)
-    >>> print(clf.predict([X[2]]))
+    >>> print(clf.predict(X[2:3]))
     [3]
 
     References