Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Closed
2 changes: 1 addition & 1 deletion sklearn/cluster/k_means_.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def _k_init(X, n_clusters, x_squared_norms, random_state, n_local_trials=None):

# Initialize list of closest distances and calculate current potential
closest_dist_sq = euclidean_distances(
centers[0], X, Y_norm_squared=x_squared_norms, squared=True)
centers[0].reshape(1, -1), X, Y_norm_squared=x_squared_norms, squared=True)
current_pot = closest_dist_sq.sum()

# Pick the remaining n_clusters-1 points
Expand Down
16 changes: 0 additions & 16 deletions sklearn/covariance/tests/test_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,6 @@ def test_ledoit_wolf():
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

# test with one sample
# FIXME I don't know what this test does
X_1sample = np.arange(5)
lw = LedoitWolf()
assert_warns(UserWarning, lw.fit, X_1sample)
assert_array_almost_equal(lw.covariance_,
np.zeros(shape=(5, 5), dtype=np.float64))

# test shrinkage coeff on a simple data set (without saving precision)
lw = LedoitWolf(store_precision=False)
lw.fit(X)
Expand Down Expand Up @@ -258,14 +250,6 @@ def test_oas():
assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

# test with one sample
# FIXME I don't know what this test does
X_1sample = np.arange(5)
oa = OAS()
assert_warns(UserWarning, oa.fit, X_1sample)
assert_array_almost_equal(oa.covariance_,
np.zeros(shape=(5, 5), dtype=np.float64))

# test shrinkage coeff on a simple data set (without saving precision)
oa = OAS(store_precision=False)
oa.fit(X)
Expand Down
6 changes: 3 additions & 3 deletions sklearn/decomposition/tests/test_dict_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ def test_dict_learning_nonzero_coefs():
n_components = 4
dico = DictionaryLearning(n_components, transform_algorithm='lars',
transform_n_nonzero_coefs=3, random_state=0)
code = dico.fit(X).transform(X[1])
code = dico.fit(X).transform([X[1]])
assert_true(len(np.flatnonzero(code)) == 3)

dico.set_params(transform_algorithm='omp')
code = dico.transform(X[1])
code = dico.transform([X[1]])
assert_equal(len(np.flatnonzero(code)), 3)


Expand Down Expand Up @@ -149,7 +149,7 @@ def test_dict_learning_online_partial_fit():
random_state=0)
for i in range(10):
for sample in X:
dict2.partial_fit(sample)
dict2.partial_fit([sample])

assert_true(not np.all(sparse_encode(X, dict1.components_, alpha=1) ==
0))
Expand Down
20 changes: 0 additions & 20 deletions sklearn/ensemble/tests/test_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,26 +725,6 @@ def test_memory_layout():
yield check_memory_layout, name, dtype


def check_1d_input(name, X, X_2d, y):
ForestEstimator = FOREST_ESTIMATORS[name]
assert_raises(ValueError, ForestEstimator(random_state=0).fit, X, y)

est = ForestEstimator(random_state=0)
est.fit(X_2d, y)

if name in FOREST_CLASSIFIERS or name in FOREST_REGRESSORS:
assert_raises(ValueError, est.predict, X)


def test_1d_input():
X = iris.data[:, 0].ravel()
X_2d = iris.data[:, 0].reshape((-1, 1))
y = iris.target

for name in FOREST_ESTIMATORS:
yield check_1d_input, name, X, X_2d, y


def check_class_weights(name):
# Check class_weights resemble sample_weights behavior.
ForestClassifier = FOREST_CLASSIFIERS[name]
Expand Down
9 changes: 4 additions & 5 deletions sklearn/ensemble/tests/test_gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,9 +492,9 @@ def test_degenerate_targets():

clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
clf.fit(X, np.ones(len(X)))
clf.predict(rng.rand(2))
clf.predict(rng.rand(1, 2))
assert_array_equal(np.ones((1,), dtype=np.float64),
clf.predict(rng.rand(2)))
clf.predict(rng.rand(1, 2)))


def test_quantile_loss():
Expand Down Expand Up @@ -989,15 +989,14 @@ def test_non_uniform_weights_toy_min_weight_leaf():
X = [[1, 0],
[1, 0],
[1, 0],
[0, 1],
]
[0, 1]]
y = [0, 0, 1, 0]
# ignore the first 2 training samples by setting their weight to 0
sample_weight = [0, 0, 1, 1]
gb = GradientBoostingRegressor(n_estimators=5, min_weight_fraction_leaf=0.1)
gb.fit(X, y, sample_weight=sample_weight)
assert_true(gb.predict([[1, 0]])[0] > 0.5)
assert_almost_equal(gb.estimators_[0,0].splitter.min_weight_leaf, 0.2)
assert_almost_equal(gb.estimators_[0, 0].splitter.min_weight_leaf, 0.2)


def test_non_uniform_weights_toy_edge_case_clf():
Expand Down
3 changes: 2 additions & 1 deletion sklearn/feature_selection/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@ def inverse_transform(self, X):
# insert additional entries in indptr:
# e.g. if transform changed indptr from [0 2 6 7] to [0 2 3]
# col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3]
col_nonzeros = self.inverse_transform(np.diff(X.indptr)).ravel()
col_nonzeros = self.inverse_transform(
np.diff(X.indptr).reshape(1, -1)).ravel()
indptr = np.concatenate([[0], np.cumsum(col_nonzeros)])
Xt = csc_matrix((X.data, X.indices, indptr),
shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype)
Expand Down
4 changes: 2 additions & 2 deletions sklearn/feature_selection/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_transform_dense():
assert_equal(np.float32, sel.transform(X.astype(np.float32)).dtype)

# Check 1d list and other dtype:
names_t_actual = sel.transform(feature_names)
names_t_actual = sel.transform([feature_names])
assert_array_equal(feature_names_t, names_t_actual.ravel())

# Check wrong shape raises error
Expand Down Expand Up @@ -85,7 +85,7 @@ def test_inverse_transform_dense():
sel.inverse_transform(Xt.astype(np.float32)).dtype)

# Check 1d list and other dtype:
names_inv_actual = sel.inverse_transform(feature_names_t)
names_inv_actual = sel.inverse_transform([feature_names_t])
assert_array_equal(feature_names_inv, names_inv_actual.ravel())

# Check wrong shape raises error
Expand Down
2 changes: 1 addition & 1 deletion sklearn/feature_selection/tests/test_feature_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ def test_tied_scores():

for n_features in [1, 2, 3]:
sel = SelectKBest(chi2, k=n_features).fit(X_train, y_train)
X_test = sel.transform([0, 1, 2])
X_test = sel.transform([[0, 1, 2]])
assert_array_equal(X_test[0], np.arange(3)[-n_features:])


Expand Down
4 changes: 2 additions & 2 deletions sklearn/feature_selection/univariate_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ def chi2(X, y):

observed = safe_sparse_dot(Y.T, X) # n_classes * n_features

feature_count = check_array(X.sum(axis=0))
class_prob = check_array(Y.mean(axis=0))
feature_count = X.sum(axis=0).reshape(1, -1)
class_prob = Y.mean(axis=0).reshape(1, -1)
expected = np.dot(class_prob.T, feature_count)

return _chisquare(observed, expected)
Expand Down
30 changes: 15 additions & 15 deletions sklearn/gaussian_process/gaussian_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,9 +769,9 @@ def minus_reduced_likelihood_function(log10t):
# Initialize under isotropy assumption
if verbose:
print("Initialize under isotropy assumption...")
self.theta0 = check_array(self.theta0.min())
self.thetaL = check_array(self.thetaL.min())
self.thetaU = check_array(self.thetaU.max())
self.theta0 = np.atleast_2d(self.theta0.min())
self.thetaL = np.atleast_2d(self.thetaL.min())
self.thetaU = np.atleast_2d(self.thetaU.max())
theta_iso, optimal_rlf_value_iso, par_iso = \
self._arg_max_reduced_likelihood_function()
optimal_theta = theta_iso + np.zeros(theta0.shape)
Expand All @@ -782,16 +782,16 @@ def minus_reduced_likelihood_function(log10t):
for i in self.random_state.permutation(theta0.size):
if verbose:
print("Proceeding along dimension %d..." % (i + 1))
self.theta0 = check_array(theta_iso)
self.thetaL = check_array(thetaL[0, i])
self.thetaU = check_array(thetaU[0, i])
self.theta0 = np.atleast_2d(theta_iso)
self.thetaL = np.atleast_2d(thetaL[0, i])
self.thetaU = np.atleast_2d(thetaU[0, i])

def corr_cut(t, d):
return corr(check_array(np.hstack([optimal_theta[0][0:i],
t[0],
optimal_theta[0][(i +
1)::]])),
d)
return corr(
np.atleast_2d(
np.hstack([optimal_theta[0][0:i], t[0],
optimal_theta[0][(i + 1)::]])),
d)

self.corr = corr_cut
optimal_theta[0, i], optimal_rlf_value, optimal_par = \
Expand Down Expand Up @@ -824,7 +824,7 @@ def _check_params(self, n_samples=None):

# Check regression weights if given (Ordinary Kriging)
if self.beta0 is not None:
self.beta0 = check_array(self.beta0)
self.beta0 = np.atleast_2d(self.beta0)
if self.beta0.shape[1] != 1:
# Force to column vector
self.beta0 = self.beta0.T
Expand All @@ -844,12 +844,12 @@ def _check_params(self, n_samples=None):
"'light', %s was given." % self.storage_mode)

# Check correlation parameters
self.theta0 = check_array(self.theta0)
self.theta0 = np.atleast_2d(self.theta0)
lth = self.theta0.size

if self.thetaL is not None and self.thetaU is not None:
self.thetaL = check_array(self.thetaL)
self.thetaU = check_array(self.thetaU)
self.thetaL = np.atleast_2d(self.thetaL)
self.thetaU = np.atleast_2d(self.thetaU)
if self.thetaL.size != lth or self.thetaU.size != lth:
raise ValueError("theta0, thetaL and thetaU must have the "
"same length.")
Expand Down
39 changes: 20 additions & 19 deletions sklearn/linear_model/tests/test_sgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ def test_late_onset_averaging_reached(self):
class DenseSGDClassifierTestCase(unittest.TestCase, CommonTest):
"""Test suite for the dense representation variant of SGD"""
factory_class = SGDClassifier

def test_sgd(self):
# Check that SGD gives any results :-)

Expand Down Expand Up @@ -390,7 +391,7 @@ def test_sgd_multiclass(self):
clf = self.factory(alpha=0.01, n_iter=20).fit(X2, Y2)
assert_equal(clf.coef_.shape, (3, 2))
assert_equal(clf.intercept_.shape, (3,))
assert_equal(clf.decision_function([0, 0]).shape, (1, 3))
assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
pred = clf.predict(T2)
assert_array_equal(pred, true_result2)

Expand Down Expand Up @@ -432,7 +433,7 @@ def test_sgd_multiclass_njobs(self):
clf = self.factory(alpha=0.01, n_iter=20, n_jobs=2).fit(X2, Y2)
assert_equal(clf.coef_.shape, (3, 2))
assert_equal(clf.intercept_.shape, (3,))
assert_equal(clf.decision_function([0, 0]).shape, (1, 3))
assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
pred = clf.predict(T2)
assert_array_equal(pred, true_result2)

Expand Down Expand Up @@ -469,14 +470,14 @@ def test_sgd_proba(self):
for loss in ["log", "modified_huber"]:
clf = self.factory(loss="modified_huber", alpha=0.01, n_iter=10)
clf.fit(X, Y)
p = clf.predict_proba([3, 2])
p = clf.predict_proba([[3, 2]])
assert_true(p[0, 1] > 0.5)
p = clf.predict_proba([-1, -1])
p = clf.predict_proba([[-1, -1]])
assert_true(p[0, 1] < 0.5)

p = clf.predict_log_proba([3, 2])
p = clf.predict_log_proba([[3, 2]])
assert_true(p[0, 1] > p[0, 0])
p = clf.predict_log_proba([-1, -1])
p = clf.predict_log_proba([[-1, -1]])
assert_true(p[0, 1] < p[0, 0])

# log loss multiclass probability estimates
Expand All @@ -488,25 +489,25 @@ def test_sgd_proba(self):
assert_almost_equal(p[0].sum(), 1)
assert_true(np.all(p[0] >= 0))

p = clf.predict_proba([-1, -1])
d = clf.decision_function([-1, -1])
p = clf.predict_proba([[-1, -1]])
d = clf.decision_function([[-1, -1]])
assert_array_equal(np.argsort(p[0]), np.argsort(d[0]))

l = clf.predict_log_proba([3, 2])
p = clf.predict_proba([3, 2])
l = clf.predict_log_proba([[3, 2]])
p = clf.predict_proba([[3, 2]])
assert_array_almost_equal(np.log(p), l)

l = clf.predict_log_proba([-1, -1])
p = clf.predict_proba([-1, -1])
l = clf.predict_log_proba([[-1, -1]])
p = clf.predict_proba([[-1, -1]])
assert_array_almost_equal(np.log(p), l)

# Modified Huber multiclass probability estimates; requires a separate
# test because the hard zero/one probabilities may destroy the
# ordering present in decision_function output.
clf = self.factory(loss="modified_huber", alpha=0.01, n_iter=10)
clf.fit(X2, Y2)
d = clf.decision_function([3, 2])
p = clf.predict_proba([3, 2])
d = clf.decision_function([[3, 2]])
p = clf.predict_proba([[3, 2]])
if not isinstance(self, SparseSGDClassifierTestCase):
assert_equal(np.argmax(d, axis=1), np.argmax(p, axis=1))
else: # XXX the sparse test gets a different X2 (?)
Expand All @@ -516,9 +517,9 @@ def test_sgd_proba(self):
# which would cause naive normalization to fail (see comment
# in SGDClassifier.predict_proba)
x = X.mean(axis=0)
d = clf.decision_function(x)
d = clf.decision_function([x])
if np.all(d < -1): # XXX not true in sparse test case (why?)
p = clf.predict_proba(x)
p = clf.predict_proba([x])
assert_array_almost_equal(p[0], [1 / 3.] * 3)

def test_sgd_l1(self):
Expand Down Expand Up @@ -703,7 +704,7 @@ def test_partial_fit_binary(self):
clf.partial_fit(X[:third], Y[:third], classes=classes)
assert_equal(clf.coef_.shape, (1, X.shape[1]))
assert_equal(clf.intercept_.shape, (1,))
assert_equal(clf.decision_function([0, 0]).shape, (1, ))
assert_equal(clf.decision_function([[0, 0]]).shape, (1, ))
id1 = id(clf.coef_.data)

clf.partial_fit(X[third:], Y[third:])
Expand All @@ -722,7 +723,7 @@ def test_partial_fit_multiclass(self):
clf.partial_fit(X2[:third], Y2[:third], classes=classes)
assert_equal(clf.coef_.shape, (3, X2.shape[1]))
assert_equal(clf.intercept_.shape, (3,))
assert_equal(clf.decision_function([0, 0]).shape, (1, 3))
assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
id1 = id(clf.coef_.data)

clf.partial_fit(X2[third:], Y2[third:])
Expand Down Expand Up @@ -1017,7 +1018,7 @@ def test_partial_fit(self):
clf.partial_fit(X[:third], Y[:third])
assert_equal(clf.coef_.shape, (X.shape[1], ))
assert_equal(clf.intercept_.shape, (1,))
assert_equal(clf.decision_function([0, 0]).shape, (1, ))
assert_equal(clf.decision_function([[0, 0]]).shape, (1, ))
id1 = id(clf.coef_.data)

clf.partial_fit(X[third:], Y[third:])
Expand Down
7 changes: 3 additions & 4 deletions sklearn/metrics/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def euclidean_distances(X, Y=None, Y_norm_squared=None, squared=False):
X, Y = check_pairwise_arrays(X, Y)

if Y_norm_squared is not None:
YY = check_array(Y_norm_squared)
YY = check_array(np.atleast_2d(Y_norm_squared))
if YY.shape != (1, Y.shape[0]):
raise ValueError(
"Incompatible dimensions for Y and Y_norm_squared")
Expand Down Expand Up @@ -602,8 +602,7 @@ def paired_cosine_distances(X, Y):
'l2': paired_euclidean_distances,
'l1': paired_manhattan_distances,
'manhattan': paired_manhattan_distances,
'cityblock': paired_manhattan_distances,
}
'cityblock': paired_manhattan_distances}


def paired_distances(X, Y, metric="euclidean", **kwds):
Expand Down Expand Up @@ -1089,7 +1088,7 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=1, **kwds):

"""
if (metric not in _VALID_METRICS and
not callable(metric) and metric != "precomputed"):
not callable(metric) and metric != "precomputed"):
raise ValueError("Unknown metric %s. "
"Valid metrics are %s, or 'precomputed', or a "
"callable" % (metric, _VALID_METRICS))
Expand Down
4 changes: 2 additions & 2 deletions sklearn/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
# convert to float to support sample weight consistently
Y = Y.astype(np.float64)
if sample_weight is not None:
Y *= check_array(sample_weight).T
Y *= check_array(sample_weight)

class_prior = self.class_prior

Expand Down Expand Up @@ -524,7 +524,7 @@ def fit(self, X, y, sample_weight=None):
# this means we also don't have to cast X to floating point
Y = Y.astype(np.float64)
if sample_weight is not None:
Y *= check_array(sample_weight).T
Y *= check_array(sample_weight)

class_prior = self.class_prior

Expand Down
Loading