Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[RFC] deprecate 1d X in check_array [was reshape sensibly] #4511

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
Closed
2 changes: 1 addition & 1 deletion sklearn/cluster/k_means_.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def _k_init(X, n_clusters, x_squared_norms, random_state, n_local_trials=None):

# Initialize list of closest distances and calculate current potential
closest_dist_sq = euclidean_distances(
centers[0], X, Y_norm_squared=x_squared_norms, squared=True)
centers[0].reshape(1, -1), X, Y_norm_squared=x_squared_norms, squared=True)
current_pot = closest_dist_sq.sum()

# Pick the remaining n_clusters-1 points
Expand Down
16 changes: 0 additions & 16 deletions sklearn/covariance/tests/test_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,6 @@ def test_ledoit_wolf():
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

# test with one sample
# FIXME I don't know what this test does
X_1sample = np.arange(5)
lw = LedoitWolf()
assert_warns(UserWarning, lw.fit, X_1sample)
assert_array_almost_equal(lw.covariance_,
np.zeros(shape=(5, 5), dtype=np.float64))

# test shrinkage coeff on a simple data set (without saving precision)
lw = LedoitWolf(store_precision=False)
lw.fit(X)
Expand Down Expand Up @@ -258,14 +250,6 @@ def test_oas():
assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

# test with one sample
# FIXME I don't know what this test does
X_1sample = np.arange(5)
oa = OAS()
assert_warns(UserWarning, oa.fit, X_1sample)
assert_array_almost_equal(oa.covariance_,
np.zeros(shape=(5, 5), dtype=np.float64))

# test shrinkage coeff on a simple data set (without saving precision)
oa = OAS(store_precision=False)
oa.fit(X)
Expand Down
6 changes: 3 additions & 3 deletions sklearn/decomposition/tests/test_dict_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ def test_dict_learning_nonzero_coefs():
n_components = 4
dico = DictionaryLearning(n_components, transform_algorithm='lars',
transform_n_nonzero_coefs=3, random_state=0)
code = dico.fit(X).transform(X[1])
code = dico.fit(X).transform([X[1]])
assert_true(len(np.flatnonzero(code)) == 3)

dico.set_params(transform_algorithm='omp')
code = dico.transform(X[1])
code = dico.transform([X[1]])
assert_equal(len(np.flatnonzero(code)), 3)


Expand Down Expand Up @@ -149,7 +149,7 @@ def test_dict_learning_online_partial_fit():
random_state=0)
for i in range(10):
for sample in X:
dict2.partial_fit(sample)
dict2.partial_fit([sample])

assert_true(not np.all(sparse_encode(X, dict1.components_, alpha=1) ==
0))
Expand Down
20 changes: 0 additions & 20 deletions sklearn/ensemble/tests/test_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,26 +725,6 @@ def test_memory_layout():
yield check_memory_layout, name, dtype


def check_1d_input(name, X, X_2d, y):
ForestEstimator = FOREST_ESTIMATORS[name]
assert_raises(ValueError, ForestEstimator(random_state=0).fit, X, y)

est = ForestEstimator(random_state=0)
est.fit(X_2d, y)

if name in FOREST_CLASSIFIERS or name in FOREST_REGRESSORS:
assert_raises(ValueError, est.predict, X)


def test_1d_input():
X = iris.data[:, 0].ravel()
X_2d = iris.data[:, 0].reshape((-1, 1))
y = iris.target

for name in FOREST_ESTIMATORS:
yield check_1d_input, name, X, X_2d, y


def check_class_weights(name):
# Check class_weights resemble sample_weights behavior.
ForestClassifier = FOREST_CLASSIFIERS[name]
Expand Down
9 changes: 4 additions & 5 deletions sklearn/ensemble/tests/test_gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,9 +492,9 @@ def test_degenerate_targets():

clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
clf.fit(X, np.ones(len(X)))
clf.predict(rng.rand(2))
clf.predict(rng.rand(1, 2))
assert_array_equal(np.ones((1,), dtype=np.float64),
clf.predict(rng.rand(2)))
clf.predict(rng.rand(1, 2)))


def test_quantile_loss():
Expand Down Expand Up @@ -989,15 +989,14 @@ def test_non_uniform_weights_toy_min_weight_leaf():
X = [[1, 0],
[1, 0],
[1, 0],
[0, 1],
]
[0, 1]]
y = [0, 0, 1, 0]
# ignore the first 2 training samples by setting their weight to 0
sample_weight = [0, 0, 1, 1]
gb = GradientBoostingRegressor(n_estimators=5, min_weight_fraction_leaf=0.1)
gb.fit(X, y, sample_weight=sample_weight)
assert_true(gb.predict([[1, 0]])[0] > 0.5)
assert_almost_equal(gb.estimators_[0,0].splitter.min_weight_leaf, 0.2)
assert_almost_equal(gb.estimators_[0, 0].splitter.min_weight_leaf, 0.2)


def test_non_uniform_weights_toy_edge_case_clf():
Expand Down
3 changes: 2 additions & 1 deletion sklearn/feature_selection/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@ def inverse_transform(self, X):
# insert additional entries in indptr:
# e.g. if transform changed indptr from [0 2 6 7] to [0 2 3]
# col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3]
col_nonzeros = self.inverse_transform(np.diff(X.indptr)).ravel()
col_nonzeros = self.inverse_transform(
np.diff(X.indptr).reshape(1, -1)).ravel()
indptr = np.concatenate([[0], np.cumsum(col_nonzeros)])
Xt = csc_matrix((X.data, X.indices, indptr),
shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype)
Expand Down
4 changes: 2 additions & 2 deletions sklearn/feature_selection/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_transform_dense():
assert_equal(np.float32, sel.transform(X.astype(np.float32)).dtype)

# Check 1d list and other dtype:
names_t_actual = sel.transform(feature_names)
names_t_actual = sel.transform([feature_names])
assert_array_equal(feature_names_t, names_t_actual.ravel())

# Check wrong shape raises error
Expand Down Expand Up @@ -85,7 +85,7 @@ def test_inverse_transform_dense():
sel.inverse_transform(Xt.astype(np.float32)).dtype)

# Check 1d list and other dtype:
names_inv_actual = sel.inverse_transform(feature_names_t)
names_inv_actual = sel.inverse_transform([feature_names_t])
assert_array_equal(feature_names_inv, names_inv_actual.ravel())

# Check wrong shape raises error
Expand Down
2 changes: 1 addition & 1 deletion sklearn/feature_selection/tests/test_feature_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ def test_tied_scores():

for n_features in [1, 2, 3]:
sel = SelectKBest(chi2, k=n_features).fit(X_train, y_train)
X_test = sel.transform([0, 1, 2])
X_test = sel.transform([[0, 1, 2]])
assert_array_equal(X_test[0], np.arange(3)[-n_features:])


Expand Down
4 changes: 2 additions & 2 deletions sklearn/feature_selection/univariate_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ def chi2(X, y):

observed = safe_sparse_dot(Y.T, X) # n_classes * n_features

feature_count = check_array(X.sum(axis=0))
class_prob = check_array(Y.mean(axis=0))
feature_count = X.sum(axis=0).reshape(1, -1)
class_prob = Y.mean(axis=0).reshape(1, -1)
expected = np.dot(class_prob.T, feature_count)

return _chisquare(observed, expected)
Expand Down
30 changes: 15 additions & 15 deletions sklearn/gaussian_process/gaussian_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,9 +769,9 @@ def minus_reduced_likelihood_function(log10t):
# Initialize under isotropy assumption
if verbose:
print("Initialize under isotropy assumption...")
self.theta0 = check_array(self.theta0.min())
self.thetaL = check_array(self.thetaL.min())
self.thetaU = check_array(self.thetaU.max())
self.theta0 = np.atleast_2d(self.theta0.min())
self.thetaL = np.atleast_2d(self.thetaL.min())
self.thetaU = np.atleast_2d(self.thetaU.max())
theta_iso, optimal_rlf_value_iso, par_iso = \
self._arg_max_reduced_likelihood_function()
optimal_theta = theta_iso + np.zeros(theta0.shape)
Expand All @@ -782,16 +782,16 @@ def minus_reduced_likelihood_function(log10t):
for i in self.random_state.permutation(theta0.size):
if verbose:
print("Proceeding along dimension %d..." % (i + 1))
self.theta0 = check_array(theta_iso)
self.thetaL = check_array(thetaL[0, i])
self.thetaU = check_array(thetaU[0, i])
self.theta0 = np.atleast_2d(theta_iso)
self.thetaL = np.atleast_2d(thetaL[0, i])
self.thetaU = np.atleast_2d(thetaU[0, i])

def corr_cut(t, d):
return corr(check_array(np.hstack([optimal_theta[0][0:i],
t[0],
optimal_theta[0][(i +
1)::]])),
d)
return corr(
np.atleast_2d(
np.hstack([optimal_theta[0][0:i], t[0],
optimal_theta[0][(i + 1)::]])),
d)

self.corr = corr_cut
optimal_theta[0, i], optimal_rlf_value, optimal_par = \
Expand Down Expand Up @@ -824,7 +824,7 @@ def _check_params(self, n_samples=None):

# Check regression weights if given (Ordinary Kriging)
if self.beta0 is not None:
self.beta0 = check_array(self.beta0)
self.beta0 = np.atleast_2d(self.beta0)
if self.beta0.shape[1] != 1:
# Force to column vector
self.beta0 = self.beta0.T
Expand All @@ -844,12 +844,12 @@ def _check_params(self, n_samples=None):
"'light', %s was given." % self.storage_mode)

# Check correlation parameters
self.theta0 = check_array(self.theta0)
self.theta0 = np.atleast_2d(self.theta0)
lth = self.theta0.size

if self.thetaL is not None and self.thetaU is not None:
self.thetaL = check_array(self.thetaL)
self.thetaU = check_array(self.thetaU)
self.thetaL = np.atleast_2d(self.thetaL)
self.thetaU = np.atleast_2d(self.thetaU)
if self.thetaL.size != lth or self.thetaU.size != lth:
raise ValueError("theta0, thetaL and thetaU must have the "
"same length.")
Expand Down
39 changes: 20 additions & 19 deletions sklearn/linear_model/tests/test_sgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ def test_late_onset_averaging_reached(self):
class DenseSGDClassifierTestCase(unittest.TestCase, CommonTest):
"""Test suite for the dense representation variant of SGD"""
factory_class = SGDClassifier

def test_sgd(self):
# Check that SGD gives any results :-)

Expand Down Expand Up @@ -390,7 +391,7 @@ def test_sgd_multiclass(self):
clf = self.factory(alpha=0.01, n_iter=20).fit(X2, Y2)
assert_equal(clf.coef_.shape, (3, 2))
assert_equal(clf.intercept_.shape, (3,))
assert_equal(clf.decision_function([0, 0]).shape, (1, 3))
assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
pred = clf.predict(T2)
assert_array_equal(pred, true_result2)

Expand Down Expand Up @@ -432,7 +433,7 @@ def test_sgd_multiclass_njobs(self):
clf = self.factory(alpha=0.01, n_iter=20, n_jobs=2).fit(X2, Y2)
assert_equal(clf.coef_.shape, (3, 2))
assert_equal(clf.intercept_.shape, (3,))
assert_equal(clf.decision_function([0, 0]).shape, (1, 3))
assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
pred = clf.predict(T2)
assert_array_equal(pred, true_result2)

Expand Down Expand Up @@ -469,14 +470,14 @@ def test_sgd_proba(self):
for loss in ["log", "modified_huber"]:
clf = self.factory(loss="modified_huber", alpha=0.01, n_iter=10)
clf.fit(X, Y)
p = clf.predict_proba([3, 2])
p = clf.predict_proba([[3, 2]])
assert_true(p[0, 1] > 0.5)
p = clf.predict_proba([-1, -1])
p = clf.predict_proba([[-1, -1]])
assert_true(p[0, 1] < 0.5)

p = clf.predict_log_proba([3, 2])
p = clf.predict_log_proba([[3, 2]])
assert_true(p[0, 1] > p[0, 0])
p = clf.predict_log_proba([-1, -1])
p = clf.predict_log_proba([[-1, -1]])
assert_true(p[0, 1] < p[0, 0])

# log loss multiclass probability estimates
Expand All @@ -488,25 +489,25 @@ def test_sgd_proba(self):
assert_almost_equal(p[0].sum(), 1)
assert_true(np.all(p[0] >= 0))

p = clf.predict_proba([-1, -1])
d = clf.decision_function([-1, -1])
p = clf.predict_proba([[-1, -1]])
d = clf.decision_function([[-1, -1]])
assert_array_equal(np.argsort(p[0]), np.argsort(d[0]))

l = clf.predict_log_proba([3, 2])
p = clf.predict_proba([3, 2])
l = clf.predict_log_proba([[3, 2]])
p = clf.predict_proba([[3, 2]])
assert_array_almost_equal(np.log(p), l)

l = clf.predict_log_proba([-1, -1])
p = clf.predict_proba([-1, -1])
l = clf.predict_log_proba([[-1, -1]])
p = clf.predict_proba([[-1, -1]])
assert_array_almost_equal(np.log(p), l)

# Modified Huber multiclass probability estimates; requires a separate
# test because the hard zero/one probabilities may destroy the
# ordering present in decision_function output.
clf = self.factory(loss="modified_huber", alpha=0.01, n_iter=10)
clf.fit(X2, Y2)
d = clf.decision_function([3, 2])
p = clf.predict_proba([3, 2])
d = clf.decision_function([[3, 2]])
p = clf.predict_proba([[3, 2]])
if not isinstance(self, SparseSGDClassifierTestCase):
assert_equal(np.argmax(d, axis=1), np.argmax(p, axis=1))
else: # XXX the sparse test gets a different X2 (?)
Expand All @@ -516,9 +517,9 @@ def test_sgd_proba(self):
# which would cause naive normalization to fail (see comment
# in SGDClassifier.predict_proba)
x = X.mean(axis=0)
d = clf.decision_function(x)
d = clf.decision_function([x])
if np.all(d < -1): # XXX not true in sparse test case (why?)
p = clf.predict_proba(x)
p = clf.predict_proba([x])
assert_array_almost_equal(p[0], [1 / 3.] * 3)

def test_sgd_l1(self):
Expand Down Expand Up @@ -703,7 +704,7 @@ def test_partial_fit_binary(self):
clf.partial_fit(X[:third], Y[:third], classes=classes)
assert_equal(clf.coef_.shape, (1, X.shape[1]))
assert_equal(clf.intercept_.shape, (1,))
assert_equal(clf.decision_function([0, 0]).shape, (1, ))
assert_equal(clf.decision_function([[0, 0]]).shape, (1, ))
id1 = id(clf.coef_.data)

clf.partial_fit(X[third:], Y[third:])
Expand All @@ -722,7 +723,7 @@ def test_partial_fit_multiclass(self):
clf.partial_fit(X2[:third], Y2[:third], classes=classes)
assert_equal(clf.coef_.shape, (3, X2.shape[1]))
assert_equal(clf.intercept_.shape, (3,))
assert_equal(clf.decision_function([0, 0]).shape, (1, 3))
assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
id1 = id(clf.coef_.data)

clf.partial_fit(X2[third:], Y2[third:])
Expand Down Expand Up @@ -1017,7 +1018,7 @@ def test_partial_fit(self):
clf.partial_fit(X[:third], Y[:third])
assert_equal(clf.coef_.shape, (X.shape[1], ))
assert_equal(clf.intercept_.shape, (1,))
assert_equal(clf.decision_function([0, 0]).shape, (1, ))
assert_equal(clf.decision_function([[0, 0]]).shape, (1, ))
id1 = id(clf.coef_.data)

clf.partial_fit(X[third:], Y[third:])
Expand Down
7 changes: 3 additions & 4 deletions sklearn/metrics/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def euclidean_distances(X, Y=None, Y_norm_squared=None, squared=False):
X, Y = check_pairwise_arrays(X, Y)

if Y_norm_squared is not None:
YY = check_array(Y_norm_squared)
YY = check_array(np.atleast_2d(Y_norm_squared))
if YY.shape != (1, Y.shape[0]):
raise ValueError(
"Incompatible dimensions for Y and Y_norm_squared")
Expand Down Expand Up @@ -602,8 +602,7 @@ def paired_cosine_distances(X, Y):
'l2': paired_euclidean_distances,
'l1': paired_manhattan_distances,
'manhattan': paired_manhattan_distances,
'cityblock': paired_manhattan_distances,
}
'cityblock': paired_manhattan_distances}


def paired_distances(X, Y, metric="euclidean", **kwds):
Expand Down Expand Up @@ -1089,7 +1088,7 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=1, **kwds):

"""
if (metric not in _VALID_METRICS and
not callable(metric) and metric != "precomputed"):
not callable(metric) and metric != "precomputed"):
raise ValueError("Unknown metric %s. "
"Valid metrics are %s, or 'precomputed', or a "
"callable" % (metric, _VALID_METRICS))
Expand Down
4 changes: 2 additions & 2 deletions sklearn/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
# convert to float to support sample weight consistently
Y = Y.astype(np.float64)
if sample_weight is not None:
Y *= check_array(sample_weight).T
Y *= check_array(sample_weight)

class_prior = self.class_prior

Expand Down Expand Up @@ -524,7 +524,7 @@ def fit(self, X, y, sample_weight=None):
# this means we also don't have to cast X to floating point
Y = Y.astype(np.float64)
if sample_weight is not None:
Y *= check_array(sample_weight).T
Y *= check_array(sample_weight)

class_prior = self.class_prior

Expand Down
Loading