diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py index 9cae6338f1a76..cc094c4689940 100644 --- a/sklearn/cluster/k_means_.py +++ b/sklearn/cluster/k_means_.py @@ -94,7 +94,7 @@ def _k_init(X, n_clusters, x_squared_norms, random_state, n_local_trials=None): # Initialize list of closest distances and calculate current potential closest_dist_sq = euclidean_distances( - centers[0], X, Y_norm_squared=x_squared_norms, squared=True) + centers[0].reshape(1, -1), X, Y_norm_squared=x_squared_norms, squared=True) current_pot = closest_dist_sq.sum() # Pick the remaining n_clusters-1 points diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py index f34029b400144..37fbc15804e2a 100644 --- a/sklearn/covariance/tests/test_covariance.py +++ b/sklearn/covariance/tests/test_covariance.py @@ -171,14 +171,6 @@ def test_ledoit_wolf(): assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4) - # test with one sample - # FIXME I don't know what this test does - X_1sample = np.arange(5) - lw = LedoitWolf() - assert_warns(UserWarning, lw.fit, X_1sample) - assert_array_almost_equal(lw.covariance_, - np.zeros(shape=(5, 5), dtype=np.float64)) - # test shrinkage coeff on a simple data set (without saving precision) lw = LedoitWolf(store_precision=False) lw.fit(X) @@ -258,14 +250,6 @@ def test_oas(): assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) - # test with one sample - # FIXME I don't know what this test does - X_1sample = np.arange(5) - oa = OAS() - assert_warns(UserWarning, oa.fit, X_1sample) - assert_array_almost_equal(oa.covariance_, - np.zeros(shape=(5, 5), dtype=np.float64)) - # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py index f0bb6bb281b27..cfb006be9ec6e 100644 --- a/sklearn/decomposition/tests/test_dict_learning.py +++ b/sklearn/decomposition/tests/test_dict_learning.py @@ -51,11 +51,11 @@ def test_dict_learning_nonzero_coefs(): n_components = 4 dico = DictionaryLearning(n_components, transform_algorithm='lars', transform_n_nonzero_coefs=3, random_state=0) - code = dico.fit(X).transform(X[1]) + code = dico.fit(X).transform([X[1]]) assert_true(len(np.flatnonzero(code)) == 3) dico.set_params(transform_algorithm='omp') - code = dico.transform(X[1]) + code = dico.transform([X[1]]) assert_equal(len(np.flatnonzero(code)), 3) @@ -149,7 +149,7 @@ def test_dict_learning_online_partial_fit(): random_state=0) for i in range(10): for sample in X: - dict2.partial_fit(sample) + dict2.partial_fit([sample]) assert_true(not np.all(sparse_encode(X, dict1.components_, alpha=1) == 0)) diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index 33aa5cb3e4050..7a4d89498abe6 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -725,26 +725,6 @@ def test_memory_layout(): yield check_memory_layout, name, dtype -def check_1d_input(name, X, X_2d, y): - ForestEstimator = FOREST_ESTIMATORS[name] - assert_raises(ValueError, ForestEstimator(random_state=0).fit, X, y) - - est = ForestEstimator(random_state=0) - est.fit(X_2d, y) - - if name in FOREST_CLASSIFIERS or name in FOREST_REGRESSORS: - assert_raises(ValueError, est.predict, X) - - -def test_1d_input(): - X = iris.data[:, 0].ravel() - X_2d = iris.data[:, 0].reshape((-1, 1)) - y = iris.target - - for name in FOREST_ESTIMATORS: - yield check_1d_input, name, X, X_2d, y - - def check_class_weights(name): # Check class_weights resemble sample_weights behavior. ForestClassifier = FOREST_CLASSIFIERS[name] diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index 3f7f7f23f6566..2bb4246dc7f07 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -492,9 +492,9 @@ def test_degenerate_targets(): clf = GradientBoostingRegressor(n_estimators=100, random_state=1) clf.fit(X, np.ones(len(X))) - clf.predict(rng.rand(2)) + clf.predict(rng.rand(1, 2)) assert_array_equal(np.ones((1,), dtype=np.float64), - clf.predict(rng.rand(2))) + clf.predict(rng.rand(1, 2))) def test_quantile_loss(): @@ -989,15 +989,14 @@ def test_non_uniform_weights_toy_min_weight_leaf(): X = [[1, 0], [1, 0], [1, 0], - [0, 1], - ] + [0, 1]] y = [0, 0, 1, 0] # ignore the first 2 training samples by setting their weight to 0 sample_weight = [0, 0, 1, 1] gb = GradientBoostingRegressor(n_estimators=5, min_weight_fraction_leaf=0.1) gb.fit(X, y, sample_weight=sample_weight) assert_true(gb.predict([[1, 0]])[0] > 0.5) - assert_almost_equal(gb.estimators_[0,0].splitter.min_weight_leaf, 0.2) + assert_almost_equal(gb.estimators_[0, 0].splitter.min_weight_leaf, 0.2) def test_non_uniform_weights_toy_edge_case_clf(): diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py index 16f2d274ace62..6385584404631 100644 --- a/sklearn/feature_selection/base.py +++ b/sklearn/feature_selection/base.py @@ -103,7 +103,8 @@ def inverse_transform(self, X): # insert additional entries in indptr: # e.g. if transform changed indptr from [0 2 6 7] to [0 2 3] # col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3] - col_nonzeros = self.inverse_transform(np.diff(X.indptr)).ravel() + col_nonzeros = self.inverse_transform( + np.diff(X.indptr).reshape(1, -1)).ravel() indptr = np.concatenate([[0], np.cumsum(col_nonzeros)]) Xt = csc_matrix((X.data, X.indices, indptr), shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype) diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py index fe17fa0f75326..8374c61ca8ff6 100644 --- a/sklearn/feature_selection/tests/test_base.py +++ b/sklearn/feature_selection/tests/test_base.py @@ -50,7 +50,7 @@ def test_transform_dense(): assert_equal(np.float32, sel.transform(X.astype(np.float32)).dtype) # Check 1d list and other dtype: - names_t_actual = sel.transform(feature_names) + names_t_actual = sel.transform([feature_names]) assert_array_equal(feature_names_t, names_t_actual.ravel()) # Check wrong shape raises error @@ -85,7 +85,7 @@ def test_inverse_transform_dense(): sel.inverse_transform(Xt.astype(np.float32)).dtype) # Check 1d list and other dtype: - names_inv_actual = sel.inverse_transform(feature_names_t) + names_inv_actual = sel.inverse_transform([feature_names_t]) assert_array_equal(feature_names_inv, names_inv_actual.ravel()) # Check wrong shape raises error diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py index adc289888fa1f..204d7c2e25dba 100644 --- a/sklearn/feature_selection/tests/test_feature_select.py +++ b/sklearn/feature_selection/tests/test_feature_select.py @@ -491,7 +491,7 @@ def test_tied_scores(): for n_features in [1, 2, 3]: sel = SelectKBest(chi2, k=n_features).fit(X_train, y_train) - X_test = sel.transform([0, 1, 2]) + X_test = sel.transform([[0, 1, 2]]) assert_array_equal(X_test[0], np.arange(3)[-n_features:]) diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py index 69a2198b695e3..39925a428df12 100644 --- a/sklearn/feature_selection/univariate_selection.py +++ b/sklearn/feature_selection/univariate_selection.py @@ -213,8 +213,8 @@ def chi2(X, y): observed = safe_sparse_dot(Y.T, X) # n_classes * n_features - feature_count = check_array(X.sum(axis=0)) - class_prob = check_array(Y.mean(axis=0)) + feature_count = X.sum(axis=0).reshape(1, -1) + class_prob = Y.mean(axis=0).reshape(1, -1) expected = np.dot(class_prob.T, feature_count) return _chisquare(observed, expected) diff --git a/sklearn/gaussian_process/gaussian_process.py b/sklearn/gaussian_process/gaussian_process.py index d1507f98414f2..e2e4baa1379cf 100644 --- a/sklearn/gaussian_process/gaussian_process.py +++ b/sklearn/gaussian_process/gaussian_process.py @@ -769,9 +769,9 @@ def minus_reduced_likelihood_function(log10t): # Initialize under isotropy assumption if verbose: print("Initialize under isotropy assumption...") - self.theta0 = check_array(self.theta0.min()) - self.thetaL = check_array(self.thetaL.min()) - self.thetaU = check_array(self.thetaU.max()) + self.theta0 = np.atleast_2d(self.theta0.min()) + self.thetaL = np.atleast_2d(self.thetaL.min()) + self.thetaU = np.atleast_2d(self.thetaU.max()) theta_iso, optimal_rlf_value_iso, par_iso = \ self._arg_max_reduced_likelihood_function() optimal_theta = theta_iso + np.zeros(theta0.shape) @@ -782,16 +782,16 @@ def minus_reduced_likelihood_function(log10t): for i in self.random_state.permutation(theta0.size): if verbose: print("Proceeding along dimension %d..." % (i + 1)) - self.theta0 = check_array(theta_iso) - self.thetaL = check_array(thetaL[0, i]) - self.thetaU = check_array(thetaU[0, i]) + self.theta0 = np.atleast_2d(theta_iso) + self.thetaL = np.atleast_2d(thetaL[0, i]) + self.thetaU = np.atleast_2d(thetaU[0, i]) def corr_cut(t, d): - return corr(check_array(np.hstack([optimal_theta[0][0:i], - t[0], - optimal_theta[0][(i + - 1)::]])), - d) + return corr( + np.atleast_2d( + np.hstack([optimal_theta[0][0:i], t[0], + optimal_theta[0][(i + 1)::]])), + d) self.corr = corr_cut optimal_theta[0, i], optimal_rlf_value, optimal_par = \ @@ -824,7 +824,7 @@ def _check_params(self, n_samples=None): # Check regression weights if given (Ordinary Kriging) if self.beta0 is not None: - self.beta0 = check_array(self.beta0) + self.beta0 = np.atleast_2d(self.beta0) if self.beta0.shape[1] != 1: # Force to column vector self.beta0 = self.beta0.T @@ -844,12 +844,12 @@ def _check_params(self, n_samples=None): "'light', %s was given." % self.storage_mode) # Check correlation parameters - self.theta0 = check_array(self.theta0) + self.theta0 = np.atleast_2d(self.theta0) lth = self.theta0.size if self.thetaL is not None and self.thetaU is not None: - self.thetaL = check_array(self.thetaL) - self.thetaU = check_array(self.thetaU) + self.thetaL = np.atleast_2d(self.thetaL) + self.thetaU = np.atleast_2d(self.thetaU) if self.thetaL.size != lth or self.thetaU.size != lth: raise ValueError("theta0, thetaL and thetaU must have the " "same length.") diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index 0e7d9b4c9952c..586f6e2bafdbe 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -257,6 +257,7 @@ def test_late_onset_averaging_reached(self): class DenseSGDClassifierTestCase(unittest.TestCase, CommonTest): """Test suite for the dense representation variant of SGD""" factory_class = SGDClassifier + def test_sgd(self): # Check that SGD gives any results :-) @@ -390,7 +391,7 @@ def test_sgd_multiclass(self): clf = self.factory(alpha=0.01, n_iter=20).fit(X2, Y2) assert_equal(clf.coef_.shape, (3, 2)) assert_equal(clf.intercept_.shape, (3,)) - assert_equal(clf.decision_function([0, 0]).shape, (1, 3)) + assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3)) pred = clf.predict(T2) assert_array_equal(pred, true_result2) @@ -432,7 +433,7 @@ def test_sgd_multiclass_njobs(self): clf = self.factory(alpha=0.01, n_iter=20, n_jobs=2).fit(X2, Y2) assert_equal(clf.coef_.shape, (3, 2)) assert_equal(clf.intercept_.shape, (3,)) - assert_equal(clf.decision_function([0, 0]).shape, (1, 3)) + assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3)) pred = clf.predict(T2) assert_array_equal(pred, true_result2) @@ -469,14 +470,14 @@ def test_sgd_proba(self): for loss in ["log", "modified_huber"]: clf = self.factory(loss="modified_huber", alpha=0.01, n_iter=10) clf.fit(X, Y) - p = clf.predict_proba([3, 2]) + p = clf.predict_proba([[3, 2]]) assert_true(p[0, 1] > 0.5) - p = clf.predict_proba([-1, -1]) + p = clf.predict_proba([[-1, -1]]) assert_true(p[0, 1] < 0.5) - p = clf.predict_log_proba([3, 2]) + p = clf.predict_log_proba([[3, 2]]) assert_true(p[0, 1] > p[0, 0]) - p = clf.predict_log_proba([-1, -1]) + p = clf.predict_log_proba([[-1, -1]]) assert_true(p[0, 1] < p[0, 0]) # log loss multiclass probability estimates @@ -488,16 +489,16 @@ def test_sgd_proba(self): assert_almost_equal(p[0].sum(), 1) assert_true(np.all(p[0] >= 0)) - p = clf.predict_proba([-1, -1]) - d = clf.decision_function([-1, -1]) + p = clf.predict_proba([[-1, -1]]) + d = clf.decision_function([[-1, -1]]) assert_array_equal(np.argsort(p[0]), np.argsort(d[0])) - l = clf.predict_log_proba([3, 2]) - p = clf.predict_proba([3, 2]) + l = clf.predict_log_proba([[3, 2]]) + p = clf.predict_proba([[3, 2]]) assert_array_almost_equal(np.log(p), l) - l = clf.predict_log_proba([-1, -1]) - p = clf.predict_proba([-1, -1]) + l = clf.predict_log_proba([[-1, -1]]) + p = clf.predict_proba([[-1, -1]]) assert_array_almost_equal(np.log(p), l) # Modified Huber multiclass probability estimates; requires a separate @@ -505,8 +506,8 @@ def test_sgd_proba(self): # ordering present in decision_function output. clf = self.factory(loss="modified_huber", alpha=0.01, n_iter=10) clf.fit(X2, Y2) - d = clf.decision_function([3, 2]) - p = clf.predict_proba([3, 2]) + d = clf.decision_function([[3, 2]]) + p = clf.predict_proba([[3, 2]]) if not isinstance(self, SparseSGDClassifierTestCase): assert_equal(np.argmax(d, axis=1), np.argmax(p, axis=1)) else: # XXX the sparse test gets a different X2 (?) @@ -516,9 +517,9 @@ def test_sgd_proba(self): # which would cause naive normalization to fail (see comment # in SGDClassifier.predict_proba) x = X.mean(axis=0) - d = clf.decision_function(x) + d = clf.decision_function([x]) if np.all(d < -1): # XXX not true in sparse test case (why?) - p = clf.predict_proba(x) + p = clf.predict_proba([x]) assert_array_almost_equal(p[0], [1 / 3.] * 3) def test_sgd_l1(self): @@ -703,7 +704,7 @@ def test_partial_fit_binary(self): clf.partial_fit(X[:third], Y[:third], classes=classes) assert_equal(clf.coef_.shape, (1, X.shape[1])) assert_equal(clf.intercept_.shape, (1,)) - assert_equal(clf.decision_function([0, 0]).shape, (1, )) + assert_equal(clf.decision_function([[0, 0]]).shape, (1, )) id1 = id(clf.coef_.data) clf.partial_fit(X[third:], Y[third:]) @@ -722,7 +723,7 @@ def test_partial_fit_multiclass(self): clf.partial_fit(X2[:third], Y2[:third], classes=classes) assert_equal(clf.coef_.shape, (3, X2.shape[1])) assert_equal(clf.intercept_.shape, (3,)) - assert_equal(clf.decision_function([0, 0]).shape, (1, 3)) + assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3)) id1 = id(clf.coef_.data) clf.partial_fit(X2[third:], Y2[third:]) @@ -1017,7 +1018,7 @@ def test_partial_fit(self): clf.partial_fit(X[:third], Y[:third]) assert_equal(clf.coef_.shape, (X.shape[1], )) assert_equal(clf.intercept_.shape, (1,)) - assert_equal(clf.decision_function([0, 0]).shape, (1, )) + assert_equal(clf.decision_function([[0, 0]]).shape, (1, )) id1 = id(clf.coef_.data) clf.partial_fit(X[third:], Y[third:]) diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index ac36498339250..d7ec10857a199 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -192,7 +192,7 @@ def euclidean_distances(X, Y=None, Y_norm_squared=None, squared=False): X, Y = check_pairwise_arrays(X, Y) if Y_norm_squared is not None: - YY = check_array(Y_norm_squared) + YY = check_array(np.atleast_2d(Y_norm_squared)) if YY.shape != (1, Y.shape[0]): raise ValueError( "Incompatible dimensions for Y and Y_norm_squared") @@ -602,8 +602,7 @@ def paired_cosine_distances(X, Y): 'l2': paired_euclidean_distances, 'l1': paired_manhattan_distances, 'manhattan': paired_manhattan_distances, - 'cityblock': paired_manhattan_distances, - } + 'cityblock': paired_manhattan_distances} def paired_distances(X, Y, metric="euclidean", **kwds): @@ -1089,7 +1088,7 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=1, **kwds): """ if (metric not in _VALID_METRICS and - not callable(metric) and metric != "precomputed"): + not callable(metric) and metric != "precomputed"): raise ValueError("Unknown metric %s. " "Valid metrics are %s, or 'precomputed', or a " "callable" % (metric, _VALID_METRICS)) diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index 1ecf4ad5bcc5f..7b7f907260217 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -475,7 +475,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None): # convert to float to support sample weight consistently Y = Y.astype(np.float64) if sample_weight is not None: - Y *= check_array(sample_weight).T + Y *= check_array(sample_weight) class_prior = self.class_prior @@ -524,7 +524,7 @@ def fit(self, X, y, sample_weight=None): # this means we also don't have to cast X to floating point Y = Y.astype(np.float64) if sample_weight is not None: - Y *= check_array(sample_weight).T + Y *= check_array(sample_weight) class_prior = self.class_prior diff --git a/sklearn/neighbors/approximate.py b/sklearn/neighbors/approximate.py index 57182cc5e5552..4e16e440f5016 100644 --- a/sklearn/neighbors/approximate.py +++ b/sklearn/neighbors/approximate.py @@ -428,9 +428,8 @@ def kneighbors(self, X, n_neighbors=None, return_distance=True): neighbors, distances = [], [] bin_queries, max_depth = self._query(X) for i in range(X.shape[0]): - neighs, dists = self._get_candidates(X[i], max_depth[i], - bin_queries[i], - n_neighbors) + neighs, dists = self._get_candidates(X[i].reshape((1, -1)), max_depth[i], + bin_queries[i], n_neighbors) neighbors.append(neighs) distances.append(dists) @@ -487,7 +486,8 @@ def radius_neighbors(self, X, radius=None, return_distance=True): neighbors, distances = [], [] bin_queries, max_depth = self._query(X) for i in range(X.shape[0]): - neighs, dists = self._get_radius_neighbors(X[i], max_depth[i], + neighs, dists = self._get_radius_neighbors(X[i].reshape(-1, 1), + max_depth[i], bin_queries[i], radius) neighbors.append(neighs) distances.append(dists) diff --git a/sklearn/neighbors/tests/test_approximate.py b/sklearn/neighbors/tests/test_approximate.py index 50794e263c831..cfb400d58ea58 100644 --- a/sklearn/neighbors/tests/test_approximate.py +++ b/sklearn/neighbors/tests/test_approximate.py @@ -41,7 +41,7 @@ def test_neighbors_accuracy_with_n_candidates(): lshf = LSHForest(n_candidates=n_candidates) lshf.fit(X) for j in range(n_iter): - query = X[rng.randint(0, n_samples)] + query = X[rng.randint(0, n_samples)].reshape(1, -1) neighbors = lshf.kneighbors(query, n_neighbors=n_points, return_distance=False) distances = pairwise_distances(query, X, metric='cosine') diff --git a/sklearn/neighbors/tests/test_ball_tree.py b/sklearn/neighbors/tests/test_ball_tree.py index 7a26af1833665..29c62dd729fd6 100644 --- a/sklearn/neighbors/tests/test_ball_tree.py +++ b/sklearn/neighbors/tests/test_ball_tree.py @@ -102,7 +102,7 @@ def test_ball_tree_query_radius(n_samples=100, n_features=10): rad = np.sqrt(((X - query_pt) ** 2).sum(1)) for r in np.linspace(rad[0], rad[-1], 100): - ind = bt.query_radius(query_pt, r + eps)[0] + ind = bt.query_radius([query_pt], r + eps)[0] i = np.where(rad <= r + eps)[0] ind.sort() @@ -121,7 +121,7 @@ def test_ball_tree_query_radius_distance(n_samples=100, n_features=10): rad = np.sqrt(((X - query_pt) ** 2).sum(1)) for r in np.linspace(rad[0], rad[-1], 100): - ind, dist = bt.query_radius(query_pt, r + eps, return_distance=True) + ind, dist = bt.query_radius([query_pt], r + eps, return_distance=True) ind = ind[0] dist = dist[0] diff --git a/sklearn/neighbors/tests/test_kd_tree.py b/sklearn/neighbors/tests/test_kd_tree.py index 8bccc46087996..704a715df6a0b 100644 --- a/sklearn/neighbors/tests/test_kd_tree.py +++ b/sklearn/neighbors/tests/test_kd_tree.py @@ -58,7 +58,7 @@ def test_kd_tree_query_radius(n_samples=100, n_features=10): rad = np.sqrt(((X - query_pt) ** 2).sum(1)) for r in np.linspace(rad[0], rad[-1], 100): - ind = kdt.query_radius(query_pt, r + eps)[0] + ind = kdt.query_radius([query_pt], r + eps)[0] i = np.where(rad <= r + eps)[0] ind.sort() @@ -77,7 +77,7 @@ def test_kd_tree_query_radius_distance(n_samples=100, n_features=10): rad = np.sqrt(((X - query_pt) ** 2).sum(1)) for r in np.linspace(rad[0], rad[-1], 100): - ind, dist = kdt.query_radius(query_pt, r + eps, return_distance=True) + ind, dist = kdt.query_radius([query_pt], r + eps, return_distance=True) ind = ind[0] dist = dist[0] diff --git a/sklearn/neural_network/tests/test_rbm.py b/sklearn/neural_network/tests/test_rbm.py index a735954fcb5be..fda5a9e6dd614 100644 --- a/sklearn/neural_network/tests/test_rbm.py +++ b/sklearn/neural_network/tests/test_rbm.py @@ -161,7 +161,7 @@ def test_score_samples(): # Test numerical stability (#2785): would previously generate infinities # and crash with an exception. with np.errstate(under='ignore'): - rbm1.score_samples(np.arange(1000) * 100) + rbm1.score_samples(np.arange(1000).reshape(1, -1) * 100) def test_rbm_verbose(): diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 4bfe0e6d7d130..e08c418057bab 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -63,6 +63,7 @@ check_non_transformer_estimators_n_iter, check_regressors_no_decision_function, check_pipeline_consistency, + check_X_one_dim, CROSS_DECOMPOSITION) @@ -99,6 +100,7 @@ def test_non_meta_estimators(): yield check_estimators_dtypes, name, Estimator yield check_fit_score_takes_y, name, Estimator yield check_dtype_object, name, Estimator + yield check_X_one_dim, name, Estimator # Check that all estimator yield informative messages when # trained on empty datasets diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 0e180b461b01a..de04b56f55b2d 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -253,8 +253,8 @@ def test_discretenb_predict_proba(): for cls, X in zip([BernoulliNB, MultinomialNB], [X_bernoulli, X_multinomial]): clf = cls().fit(X, y) - assert_equal(clf.predict(X[-1]), 2) - assert_equal(clf.predict_proba(X[0]).shape, (1, 2)) + assert_equal(clf.predict(X[-1:]), 2) + assert_equal(clf.predict_proba(X[:1]).shape, (1, 2)) assert_array_almost_equal(clf.predict_proba(X[:2]).sum(axis=1), np.array([1., 1.]), 6) @@ -263,10 +263,10 @@ def test_discretenb_predict_proba(): for cls, X in zip([BernoulliNB, MultinomialNB], [X_bernoulli, X_multinomial]): clf = cls().fit(X, y) - assert_equal(clf.predict_proba(X[0]).shape, (1, 3)) + assert_equal(clf.predict_proba(X[:1]).shape, (1, 3)) assert_equal(clf.predict_proba(X[:2]).shape, (2, 3)) - assert_almost_equal(np.sum(clf.predict_proba(X[1])), 1) - assert_almost_equal(np.sum(clf.predict_proba(X[-1])), 1) + assert_almost_equal(np.sum(clf.predict_proba(X[1:2])), 1) + assert_almost_equal(np.sum(clf.predict_proba(X[-1:])), 1) assert_almost_equal(np.sum(np.exp(clf.class_log_prior_)), 1) assert_almost_equal(np.sum(np.exp(clf.intercept_)), 1) @@ -351,7 +351,7 @@ def test_sample_weight_mnb(): clf.fit([[1, 2], [1, 2], [1, 0]], [0, 0, 1], sample_weight=[1, 1, 4]) - assert_array_equal(clf.predict([1, 0]), [1]) + assert_array_equal(clf.predict([[1, 0]]), [1]) positive_prior = np.exp(clf.intercept_[0]) assert_array_almost_equal([1 - positive_prior, positive_prior], [1 / 3., 2 / 3.]) @@ -459,7 +459,7 @@ def test_bnb(): # Testing data point is: # Chinese Chinese Chinese Tokyo Japan - X_test = np.array([0, 1, 1, 0, 0, 1]) + X_test = np.array([[0, 1, 1, 0, 0, 1]]) # Check the predictive probabilities are correct unnorm_predict_proba = np.array([[0.005183999999999999, diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py index 0891db43010cd..292ba4fdf9ec3 100644 --- a/sklearn/tree/tests/test_tree.py +++ b/sklearn/tree/tests/test_tree.py @@ -12,7 +12,6 @@ from scipy.sparse import coo_matrix from sklearn.random_projection import sparse_random_matrix -from sklearn.utils.random import sample_without_replacement from sklearn.metrics import accuracy_score from sklearn.metrics import mean_squared_error @@ -1220,25 +1219,6 @@ def test_explicit_sparse_zeros(): yield (check_explicit_sparse_zeros, tree) -def check_raise_error_on_1d_input(name): - TreeEstimator = ALL_TREES[name] - - X = iris.data[:, 0].ravel() - X_2d = iris.data[:, 0].reshape((-1, 1)) - y = iris.target - - assert_raises(ValueError, TreeEstimator(random_state=0).fit, X, y) - - est = TreeEstimator(random_state=0) - est.fit(X_2d, y) - assert_raises(ValueError, est.predict, X) - - -def test_1d_input(): - for name in ALL_TREES: - yield check_raise_error_on_1d_input, name - - def _check_min_weight_leaf_split_level(TreeEstimator, X, y, sample_weight): # Private function to keep pretty printing in nose yielded tests est = TreeEstimator(random_state=0) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 3e46d7491d91f..b922a9e6e93c8 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -329,6 +329,32 @@ def check_fit_score_takes_y(name, Estimator): assert_true(args[2] in ["y", "Y"]) +@ignore_warnings +def check_X_one_dim(name, Estimator): + # check that estimators work with 1-ndim X + rnd = np.random.RandomState(0) + X = 3 * rnd.uniform(size=(20, )) + y = X.astype(np.int) + y = multioutput_estimator_convert_y_2d(name, y) + estimator = Estimator() + set_fast_parameters(estimator) + if hasattr(estimator, "n_components"): + estimator.n_components = 1 + if hasattr(estimator, "n_clusters"): + estimator.n_clusters = 1 + set_random_state(estimator, 1) + estimator_reshaped = clone(estimator) + estimator.fit(X, y) + estimator_reshaped.fit(X.reshape(-1, 1), y) + for method in ["predict", "transform", "decision_function", + "predict_proba"]: + if hasattr(estimator, method): + result = getattr(estimator, method)(X) + result_reshaped = getattr(estimator_reshaped, method)(X.reshape(-1, 1)) + # we actually used it as the n_samples dimension + assert_array_almost_equal(result, result_reshaped) + + @ignore_warnings def check_estimators_dtypes(name, Estimator): rnd = np.random.RandomState(0) @@ -339,8 +365,7 @@ def check_estimators_dtypes(name, Estimator): y = X_train_int_64[:, 0] y = multioutput_estimator_convert_y_2d(name, y) for X_train in [X_train_32, X_train_64, X_train_int_64, X_train_int_32]: - with warnings.catch_warnings(record=True): - estimator = Estimator() + estimator = Estimator() set_fast_parameters(estimator) set_random_state(estimator, 1) estimator.fit(X_train, y) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 8582572ce90e8..389ea09245230 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -330,8 +330,6 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None, copy=Fal array = _ensure_sparse_format(array, accept_sparse, dtype, order, copy, force_all_finite) else: - if ensure_2d: - array = np.atleast_2d(array) if dtype == "numeric": if hasattr(array, "dtype") and array.dtype.kind == "O": # if input is object, convert to float. @@ -344,6 +342,11 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None, copy=Fal array.ndim) if force_all_finite: _assert_all_finite(array) + if ensure_2d: + if array.ndim == 1: + array = array.reshape(-1, 1) + elif array.ndim == 0: + array = array.reshape(1, 1) shape_repr = _shape_repr(array.shape) if ensure_min_samples > 0: @@ -353,7 +356,6 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None, copy=Fal " minimum of %d is required." % (n_samples, shape_repr, ensure_min_samples)) - if ensure_min_features > 0 and array.ndim == 2: n_features = array.shape[1] if n_features < ensure_min_features: